{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.01079120859503746, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.009245744906365871, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0045633516274392605, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.004790416918694973, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0047902269288897514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.001933127990923822, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.010670164600014687, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.009171019308269024, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.005036350339651108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.004433959256857634, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.004681136924773455, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.004880156833678484, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.004432560410350561, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0026396396569907665, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.002021762076765299, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0026638221461325884, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.0018418062245473266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0015898521523922682, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0017980447737500072, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.001531733782030642, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0018424219451844692, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0017979135736823082, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0015264308312907815, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0015154598513618112, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.010728836990892887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.009255073964595795, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.004467173013836145, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.004665913991630077, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.004665711894631386, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0017069519963115454, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.011278248392045498, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.009164388291537762, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.004890987649559975, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.00429148692637682, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.004506899509578943, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0048247952945530415, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.004289050120860338, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.002439638366922736, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.0017630304209887981, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.002431283937767148, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.0015536937862634659, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0012478590942919254, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0015006480971351266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0011748145334422588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0015286421403288841, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0015005320310592651, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0011274735443294048, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.001153447199612856, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.12604594230651855, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07464069128036499, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04577437788248062, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05213453993201256, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0520951971411705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.02303723618388176, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.08319473266601562, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06878971308469772, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.05944050848484039, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03360838443040848, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0399225689470768, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04521043598651886, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.033291589468717575, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.025426741689443588, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.02322428859770298, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.023352181538939476, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.013576418161392212, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.012543192133307457, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010103004053235054, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00840720534324646, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012034566141664982, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.010048446245491505, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006939687766134739, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006688016466796398, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11848125606775284, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0742579847574234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04610557481646538, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.05050479248166084, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04883675277233124, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.023832548409700394, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07634739577770233, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06788267940282822, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05625179782509804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03357461094856262, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03632734715938568, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.039033111184835434, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03278522565960884, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02443394623696804, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02204931154847145, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01975841633975506, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.014109678566455841, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013125498779118061, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011625455692410469, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01008669100701809, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010930098593235016, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01151492353528738, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007754279300570488, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.008900157175958157, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.12519489228725433, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.11605411022901535, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.11324220895767212, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.10282273590564728, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.05613372474908829, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.05348646640777588, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.06269587576389313, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0578765831887722, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.056960973888635635, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05092410370707512, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.04854152724146843, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03178427368402481, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.02773444354534149, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.027030041441321373, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.026856958866119385, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.015990890562534332, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.014572816900908947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01450331136584282, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01363953948020935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.013535238802433014, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.008891928941011429, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009742503054440022, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.008627617731690407, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007537452038377523, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1477857232093811, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13889530301094055, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13626447319984436, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12376870214939117, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06679093837738037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06417573988437653, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07398047298192978, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06833633035421371, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.067527174949646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.061024270951747894, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05809226632118225, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.037274353206157684, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03237980604171753, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.031721293926239014, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0315653532743454, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.018588023260235786, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01625085063278675, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.016181612387299538, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01513309869915247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.015035690739750862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009738252498209476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009710869751870632, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.009472930803894997, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006376879755407572, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.0794590413570404, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.06719563156366348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.05998573824763298, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.052703775465488434, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.03516847640275955, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.02970942296087742, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.05061337724328041, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.04238150268793106, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.03689465671777725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.028885340318083763, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.027898019179701805, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.023856772109866142, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.02028476819396019, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.017617126926779747, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.016967106610536575, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.012536142952740192, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.010446242988109589, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.010240290313959122, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.009430141188204288, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.009112173691391945, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.007995205000042915, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.008211107924580574, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.007388048339635134, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.006994161754846573, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.02080054208636284, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.013692399486899376, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0076781525276601315, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.008210774511098862, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.007993939332664013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0034052273258566856, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.014339144341647625, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.012975999154150486, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.009706181474030018, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0062962365336716175, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.006761157885193825, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.007241943385452032, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.006178666837513447, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0040168133564293385, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0033223628997802734, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0036383552942425013, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.002335925120860338, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0020562445279210806, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.002057891571894288, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.001644659205339849, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0019718841649591923, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0020401799120008945, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0012094209669157863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0014976600650697947, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.0183106642216444, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.012279052287340164, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.006630728952586651, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.007098561152815819, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.006940491497516632, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0027603893540799618, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.013322083279490471, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.011775949969887733, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.00847938284277916, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.005679432302713394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.006155414506793022, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0066613261587917805, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.005599453113973141, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.003475792706012726, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.0027643137145787477, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0033372000325471163, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.001995381433516741, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.00170006207190454, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.001783206476829946, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0013617955846711993, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0017834109021350741, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0017706513172015548, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0010168825974687934, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0012462785234674811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.14749057590961456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0953642874956131, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06782633066177368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06520062685012817, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.06007370352745056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03408694267272949, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.08903783559799194, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08052323758602142, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.0700913518667221, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.041731368750333786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04300183057785034, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.045400455594062805, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03848867118358612, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.029233675450086594, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.026606768369674683, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.022669285535812378, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.015506042167544365, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.014417342841625214, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.012021242640912533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.010055349208414555, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.011702767573297024, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01144123263657093, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.007760536856949329, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007415633648633957, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.16610494256019592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.13518965244293213, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.12266102433204651, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09750504046678543, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0752413347363472, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0627007931470871, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09558626264333725, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0848553478717804, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0787934809923172, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.055205777287483215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0513213649392128, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04914763197302818, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04131317511200905, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.03703423961997032, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0359659418463707, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02486463449895382, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.020592445507645607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02012024261057377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.017078200355172157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.016374995931982994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01384303905069828, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01494605652987957, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01219762023538351, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011809245683252811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16760198771953583, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15771350264549255, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1546677201986313, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1399254947900772, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07731852680444717, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07413488626480103, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08580531179904938, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07897254824638367, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07807867228984833, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07013067603111267, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06665725260972977, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0438670888543129, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03823366388678551, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03751283511519432, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03734534978866577, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022180184721946716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02046392112970352, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.0203880425542593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019175365567207336, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01907108910381794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012583530507981777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013914749026298523, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012306413613259792, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01107020489871502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.20136122405529022, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19043892621994019, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1871633529663086, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.16962672770023346, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09337486326694489, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08983296900987625, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10319662094116211, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09519895166158676, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09432286024093628, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08496277034282684, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08074599504470825, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.052648164331912994, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.045747824013233185, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04497045278549194, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04480253532528877, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.026386313140392303, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.023908400908112526, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.023839933797717094, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02233077771961689, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.022219687700271606, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014431770890951157, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015487619675695896, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.014128315262496471, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011655333451926708, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.024975255131721497, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.024206824600696564, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.008117430843412876, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.007327215280383825, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.00616056052967906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.003836372634395957, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.023828130215406418, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.023412765935063362, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.0063441721722483635, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.005563234444707632, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.005502199288457632, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.005258940160274506, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.005091345869004726, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0030613457784056664, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.002583590103313327, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.002684529172256589, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.001979124965146184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.0013090191641822457, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0018800022080540657, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.0011525026056915522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0017916479846462607, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.001803944818675518, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.0009129414684139192, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0009403919102624059, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.0566590279340744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.04353601858019829, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.036549899727106094, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.03263227641582489, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.024149306118488312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0179717019200325, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.033824071288108826, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.030763372778892517, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.02663566544651985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.01888813078403473, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.018533919006586075, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.017127634957432747, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.014667709358036518, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.011702368035912514, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.010897028259932995, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.008556852117180824, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.006204333622008562, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0058596874587237835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.005198752507567406, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.0046559651382267475, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.004462576471269131, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004452510736882687, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0033025341108441353, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0029767046216875315, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.05868585407733917, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.04297492653131485, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.03386026620864868, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.030943404883146286, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.024279529228806496, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.01660062000155449, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03589137643575668, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.032597169280052185, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.02734885737299919, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.018667208030819893, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.01869734190404415, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.018057772889733315, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01550220511853695, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.011830197647213936, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.010793964378535748, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009058129042387009, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.006352631375193596, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.005893290042877197, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005299780052155256, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.004595666192471981, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004729635082185268, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00475359009578824, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.003308624494820833, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003211639355868101, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.16886594891548157, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.13858138024806976, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1267867535352707, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.11095861345529556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0753648653626442, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.06275512278079987, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0939568281173706, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08520203828811646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.08015919476747513, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0597863607108593, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.05666801333427429, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.047822821885347366, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04071267321705818, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.036231305450201035, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.03512967377901077, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.023957006633281708, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.018705343827605247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01820918545126915, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.015579945407807827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.014790472574532032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012533819302916527, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012011219747364521, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.010556278750300407, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007651148363947868, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1662057489156723, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.14490707218647003, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.13547268509864807, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.11639294028282166, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.07647258788347244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.06714474409818649, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09446226805448532, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0856490507721901, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.07938813418149948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06317455321550369, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.05900679901242256, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04895871505141258, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0418630950152874, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.03774208948016167, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.03670092672109604, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02502431720495224, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.021138697862625122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.020668182522058487, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01877923309803009, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.018160192295908928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01430099830031395, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015436316840350628, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.012802175246179104, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012357552535831928, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.19105593860149384, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1794387698173523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17587698996067047, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15849930047988892, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08921168744564056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08519851416349411, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09912575036287308, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09132546186447144, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09033828973770142, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08016330003738403, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07577232271432877, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05068989098072052, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04381662234663963, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04287005215883255, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04265445098280907, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02537349984049797, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02247530035674572, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.022380370646715164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02071213349699974, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.020578503608703613, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013728529214859009, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014157431200146675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.013355202041566372, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010176416486501694, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23165902495384216, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2180001139640808, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2138855755329132, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19282129406929016, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10817326605319977, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10344778746366501, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11983521282672882, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11060040444135666, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.10948729515075684, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09734931588172913, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09189657866954803, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06103143468499184, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05282512307167053, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.051755111664533615, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05150288715958595, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.030479522421956062, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02660011127591133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02648267149925232, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02441377192735672, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.024254050105810165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016093013808131218, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015975279733538628, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015647700056433678, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01058848388493061, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.18258149921894073, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.15775372087955475, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1481790542602539, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.12828628718852997, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.0819409191608429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.07248146831989288, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.09822803735733032, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.08964818716049194, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.08511453866958618, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.06752445548772812, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.06312288343906403, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.04996095597743988, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.042923830449581146, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0394890159368515, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.038649484515190125, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0250751543790102, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.0209489855915308, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02063780650496483, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.018211932852864265, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.017665063962340355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013621511869132519, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.013936028815805912, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.012448587454855442, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010076997801661491, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.041594937443733215, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0341922752559185, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.02998943068087101, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.026380419731140137, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.018353495746850967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.014772842638194561, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.02494819462299347, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.02266457863152027, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.019663048908114433, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.014892438426613808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.014379444532096386, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.012678929604589939, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.010825672186911106, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.008906306698918343, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.008399453945457935, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.00635361997410655, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.004752210341393948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.004518453031778336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.00409374525770545, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.003756130114197731, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0033737262710928917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.003379982430487871, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0026478164363652468, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.002330376300960779, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.04246797040104866, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.033709339797496796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.02836700528860092, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.02501772902905941, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0181938074529171, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.013823218643665314, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.026216691359877586, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.023721233010292053, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.019970323890447617, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.014611993916332722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.01438639871776104, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01328970119357109, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.011287035420536995, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.008801436983048916, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.008121166378259659, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.006647787056863308, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.004637218080461025, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.004338820930570364, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.003943838644772768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0034871946554630995, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.003454532939940691, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0033398682717233896, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0024883151054382324, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0021620842162519693, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.18864645063877106, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.15797960758209229, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1459873616695404, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.12727224826812744, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.08475729823112488, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.07234307378530502, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10409123450517654, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09537097811698914, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.08957695960998535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06844286620616913, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0645182728767395, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05287685990333557, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04547243192791939, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04071931168437004, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.03951045125722885, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02641124278306961, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.020871341228485107, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.020350094884634018, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.017546396702528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.016705574467778206, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013596277683973312, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013183231465518475, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01157503854483366, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008173332549631596, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.19581182301044464, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17086002230644226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1617928445339203, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1331963837146759, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09023404121398926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0805949792265892, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10661068558692932, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09703952819108963, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0929022952914238, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0722258910536766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0651366114616394, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.054874613881111145, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047027330845594406, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0440291166305542, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04332913085818291, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.027700094506144524, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.023986250162124634, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02365843765437603, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020677274093031883, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02020479366183281, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015419969335198402, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016561903059482574, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014312537387013435, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012853392399847507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.22392985224723816, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20988798141479492, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2056455910205841, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.18526436388492584, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10505020618438721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10011041164398193, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11667020618915558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1076788455247879, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.10642349720001221, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09413128346204758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08882948756217957, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05957762897014618, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.051608968526124954, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05043606460094452, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.050170086324214935, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.029812484979629517, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02622772380709648, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02610582485795021, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02405848354101181, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023891407996416092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015920907258987427, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016228782013058662, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015430972911417484, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011303836479783058, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2564595341682434, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24065107107162476, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23598547279834747, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21288400888442993, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12041729688644409, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11489927023649216, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1337788701057434, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12328702211380005, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12192786484956741, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10801049321889877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10206261277198792, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06825165450572968, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.058977220207452774, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05768977850675583, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.057382818311452866, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03412787243723869, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029678653925657272, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029542861506342888, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027173161506652832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026981331408023834, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018129907548427582, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017876723781228065, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017603285610675812, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0118943490087986, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21675726771354675, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19070754945278168, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18106478452682495, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.15863358974456787, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09814859926700592, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08845825493335724, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11571011692285538, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10570679605007172, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10131833702325821, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08245562762022018, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07748518884181976, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05879849195480347, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.050576332956552505, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04719790816307068, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04639129713177681, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02956162765622139, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02484932169318199, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.024542633444070816, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02182602509856224, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021291734650731087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016038907691836357, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016158897429704666, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01488727331161499, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01147253904491663, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.05624882131814957, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.04673685505986214, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04127592220902443, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.036482103168964386, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.02495158649981022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.02034095488488674, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.034029584378004074, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.030711689963936806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.02667386643588543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02050868608057499, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.019864393398165703, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.017363112419843674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.014730810187757015, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.012151623144745827, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.011474563740193844, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.008721781894564629, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.006553805433213711, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.006244380492717028, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.005717093590646982, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.005283567123115063, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0046581970527768135, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004721521399915218, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.003672192804515362, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003369074547663331, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.05584239214658737, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0448356531560421, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.03849327936768532, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.034144509583711624, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.024065326899290085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.018837345764040947, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03394309803843498, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.030661150813102722, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.02635926939547062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.01957930251955986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.01919003762304783, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01732253096997738, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.014656837098300457, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0117182033136487, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.0109223248437047, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.00871064979583025, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.006276839412748814, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.005930435843765736, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005421918351203203, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0048976074904203415, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004584881942719221, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004560218658298254, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.003432217054069042, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003137887455523014, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20189283788204193, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17179526388645172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16030840575695038, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14037442207336426, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0913344994187355, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.07927674055099487, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11026289314031601, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10129299014806747, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09601686894893646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07475163042545319, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07039141654968262, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05609692260622978, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04842488467693329, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04389144107699394, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04276401177048683, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.028066474944353104, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02261313982307911, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.022143954411149025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019267061725258827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018504776060581207, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014542373828589916, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014303652569651604, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01262245886027813, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00914738792926073, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1857185661792755, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1642351895570755, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15641774237155914, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.13303910195827484, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.08547289669513702, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.077039934694767, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10182949155569077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09160344302654266, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.08802693337202072, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07068638503551483, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0651177242398262, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.052429959177970886, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04458831250667572, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04182535037398338, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04116600379347801, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.026555519551038742, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02291044592857361, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.022633835673332214, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020200790837407112, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.019782625138759613, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01491706445813179, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01595778577029705, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.013878321275115013, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012531750835478306, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.20426876842975616, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19040243327617645, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18583713471889496, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1670016199350357, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09577319025993347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.09066003561019897, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10749536752700806, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09904806315898895, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09726497530937195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08529669046401978, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08043240755796432, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.054878346621990204, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.047487854957580566, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04601535573601723, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.045674245804548264, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.027513204142451286, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.023929821327328682, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02376667968928814, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.021844813600182533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.021626532077789307, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014740890823304653, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01485088188201189, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.014144307002425194, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010276658460497856, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2567921280860901, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2400372177362442, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2348223179578781, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2113826423883438, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12063130736351013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11458129435777664, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13465668261051178, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12418589740991592, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12241373211145401, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1077127456665039, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.1015603318810463, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0685892403125763, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.059427835047245026, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.057841405272483826, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05746212974190712, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.034347131848335266, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02972492016851902, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02954902872443199, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02710237167775631, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026864394545555115, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018177496269345284, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01789924129843712, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017509059980511665, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011792340315878391, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.220456063747406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1968667209148407, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18825940787792206, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.165904700756073, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10047221928834915, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09178900718688965, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1180458515882492, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10725000500679016, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10333143919706345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08583743870258331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08065532892942429, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.060039665549993515, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05139217525720596, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04835400730371475, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04761003702878952, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030203010886907578, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025491753593087196, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02521909587085247, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022677693516016006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022203681990504265, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0163898766040802, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016573172062635422, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015302354469895363, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011881383135914803, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.0678648054599762, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05832449719309807, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05305132269859314, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.04682612791657448, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.030495673418045044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0260174423456192, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.03981061652302742, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03619835525751114, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03212413191795349, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02559935301542282, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.024478701874613762, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0201912559568882, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.017307959496974945, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.014783776365220547, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.014136782847344875, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010123331099748611, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.007843396626412868, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.007538937032222748, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006895206868648529, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.006475826725363731, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005365354008972645, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0054167634807527065, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0044141970574855804, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003756717313081026, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06406810879707336, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05392655357718468, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04796307533979416, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.04215999320149422, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.02830769680440426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.023390786722302437, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0381004698574543, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03472520783543587, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.030116328969597816, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.023462466895580292, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.022639183327555656, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.019295208156108856, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.016549021005630493, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.013665727339684963, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.012896580621600151, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009663157165050507, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007132804952561855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0067680515348911285, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006180749274790287, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.005666375160217285, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005012092180550098, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0048949234187603, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.003906998783349991, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0031371666118502617, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.21822869777679443, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19210366904735565, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1828901320695877, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.16063569486141205, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10019132494926453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09005742520093918, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11737607419490814, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1077209860086441, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10374101251363754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08417721092700958, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07877589017152786, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05957990512251854, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05143119394779205, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0479750819504261, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.047137074172496796, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029739700257778168, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02442052774131298, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.024043872952461243, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021100804209709167, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020537685602903366, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015280804596841335, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014738506637513638, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013784699141979218, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00894668698310852, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.21767142415046692, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19480498135089874, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18667833507061005, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1614585816860199, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10077903419733047, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09176181256771088, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11711665987968445, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10703684389591217, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10319381207227707, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08455248922109604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07797829806804657, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06012822687625885, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0515451654791832, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04868471249938011, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04798545315861702, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030279379338026047, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02570321224629879, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025370577350258827, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022534441202878952, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.022088240832090378, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016420112922787666, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016713056713342667, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015250804834067822, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012032192200422287, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.20093587040901184, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.18747079372406006, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18305633962154388, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.16458745300769806, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.0942423939704895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08924496918916702, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10567836463451385, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0974985882639885, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09569127857685089, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0839863047003746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07922372221946716, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05381370335817337, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04665176942944527, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04519587755203247, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.044841088354587555, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02692548930644989, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.023298000916838646, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.023140307515859604, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.021226949989795685, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.021006399765610695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014201732352375984, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014174871146678925, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.013610819354653358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009458303451538086, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25666892528533936, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24028541147708893, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23510636389255524, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21187376976013184, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12073830515146255, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.114763043820858, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13468465209007263, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12431541085243225, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12244067341089249, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10797642171382904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10196181386709213, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06864192336797714, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05942739546298981, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05781155824661255, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05743778869509697, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03429108113050461, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02961421199142933, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02944270521402359, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027011044323444366, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.0267647597938776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017970066517591476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0176787581294775, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017312468960881233, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01143528800457716, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2239081710577011, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20213429629802704, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19394336640834808, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.171790212392807, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.1026022806763649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09438460320234299, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1199180856347084, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10924727469682693, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10522416979074478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08863573521375656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08357775956392288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06112802401185036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.052312422543764114, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04931004345417023, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04858233407139778, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03072669729590416, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02584480121731758, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02557441033422947, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02313164994120598, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022668713703751564, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01660056971013546, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0165906623005867, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015534259378910065, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011689177714288235, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.0640607625246048, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05465925112366676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04875622317194939, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.04308336228132248, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.02866896614432335, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.023843394592404366, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.03905715420842171, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.035270337015390396, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03033614158630371, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.024030689150094986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.02319507859647274, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.019850512966513634, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.016871457919478416, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.013911967165768147, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.013128358870744705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.009944344870746136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.007403594441711903, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.007027579937130213, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006517740432173014, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.006008001510053873, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0052359155379235744, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005243778228759766, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004091125447303057, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0035971119068562984, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.061307020485401154, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0511954091489315, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04465645179152489, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.039449095726013184, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.026979098096489906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.02171090990304947, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03773412108421326, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0341796875, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.028848931193351746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02239467389881611, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.021751686930656433, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01916407234966755, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.016327768564224243, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.013060622848570347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.012180152349174023, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009564108215272427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.006884485483169556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.006461380049586296, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005991403013467789, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.005407179705798626, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004992419388145208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004890658892691135, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0037504397332668304, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003196120960637927, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.1990065574645996, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.175347700715065, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16568295657634735, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14583703875541687, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09130190312862396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08161689341068268, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1093735620379448, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10024634003639221, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09459245204925537, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07694811373949051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07229960709810257, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05553434416651726, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04778505861759186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04377507418394089, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04278473183512688, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02769162878394127, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02240234799683094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02193976752460003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019458061084151268, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018782839179039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014239305630326271, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013851226307451725, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012564071454107761, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008564845658838749, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20604567229747772, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18138472735881805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17234070599079132, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1493854820728302, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0948631763458252, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08482269942760468, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1126212477684021, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10214198380708694, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09788691997528076, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07890380918979645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07301410287618637, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05785181745886803, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04929759353399277, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.045973584055900574, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04514525830745697, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029060568660497665, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02446848154067993, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.024096177890896797, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0213989969342947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020881151780486107, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01568884588778019, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016272379085421562, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014326533302664757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011941639706492424, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.19215935468673706, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17953479290008545, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17526398599147797, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15787579119205475, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09019896388053894, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08545421808958054, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10139752924442291, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09352394193410873, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09158821403980255, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.080614373087883, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0761069506406784, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.051730308681726456, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04480559378862381, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04330800846219063, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.042957138270139694, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025888636708259583, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.022428737953305244, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.022267302498221397, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020505525171756744, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.020284608006477356, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013722153380513191, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013813982717692852, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.0131182000041008, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009387639351189137, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2512018382549286, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23531626164913177, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23016692698001862, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20750218629837036, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11818768829107285, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11230341345071793, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13215288519859314, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12199469655752182, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11990935355424881, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10582876950502396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09993904829025269, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06732939183712006, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.058350879698991776, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.056654565036296844, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05626261979341507, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03366081416606903, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029080791398882866, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02889198623597622, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02655431628227234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026304177939891815, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01765618845820427, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017480570822954178, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01696516014635563, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011423086747527122, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22689901292324066, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20633742213249207, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1988981068134308, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1771392524242401, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10438820719718933, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09684228152036667, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12147435545921326, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1103849783539772, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10684682428836823, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09110397100448608, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.0862639769911766, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06186521053314209, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05290740355849266, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.050210680812597275, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.049566250294446945, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03104746714234352, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026393255218863487, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02614405006170273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023810964077711105, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023403877392411232, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016710469499230385, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01696200482547283, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015769656747579575, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012092279270291328, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.07058891654014587, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06231483072042465, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0558323934674263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.04926640912890434, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.032067205756902695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.027103550732135773, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.044093869626522064, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03985423222184181, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03342333436012268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.027560357004404068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.026610543951392174, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.022484682500362396, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.019151266664266586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.015600333921611309, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.014646174386143684, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011276153847575188, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.00834941677749157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.007865752093493938, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0074910433031618595, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.006892383098602295, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005950332153588533, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006006445735692978, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004610849544405937, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004175604786723852, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06416056305170059, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.055855754762887955, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04870617762207985, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.042840391397476196, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.02870963141322136, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.023418471217155457, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.040762223303318024, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03726019337773323, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.030108142644166946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02449532225728035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.02374706044793129, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.020689520984888077, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0177784264087677, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.013921290636062622, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.012854062020778656, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010343882255256176, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007369615137577057, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.006817258894443512, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0065522873774170876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.005872024688869715, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005388139747083187, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005336528178304434, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.003972351551055908, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003505392000079155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.21032549440860748, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18846647441387177, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1802111268043518, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.15840712189674377, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09719694405794144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0883723720908165, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1143578514456749, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10445982217788696, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10005255788564682, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08282189071178436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07751209288835526, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.058263473212718964, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04989976808428764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04657196253538132, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04578189179301262, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029053300619125366, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.023813318461179733, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.023428594693541527, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020862402394413948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020326899364590645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014968053437769413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014513892121613026, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013490078039467335, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009047563187777996, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20650742948055267, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1848054826259613, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17628566920757294, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15230408310890198, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0956873670220375, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08650178462266922, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1150512620806694, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10304266214370728, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09814409166574478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08074052631855011, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07503896951675415, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05925438553094864, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05007854476571083, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04678504914045334, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04599136486649513, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02997427061200142, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02552846446633339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025161614641547203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022769013419747353, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.022268936038017273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016785843297839165, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017694657668471336, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01556080300360918, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01374424435198307, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18511880934238434, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17340964078903198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16955919563770294, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15304438769817352, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08716355264186859, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08276551961898804, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09757047891616821, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08992341160774231, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08836781978607178, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.078007772564888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07377452403306961, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.049905672669410706, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04316117987036705, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04190007597208023, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.041604090481996536, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024990715086460114, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021824385970830917, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021689191460609436, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020009109750390053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019822044298052788, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013371415436267853, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01357797160744667, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012866971082985401, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009468216449022293, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25166887044906616, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23623493313789368, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23133079707622528, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20893089473247528, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11877202242612839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11306880414485931, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1327078640460968, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12208554893732071, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12038824707269669, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1065032109618187, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10073071718215942, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06786930561065674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.058554332703351974, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05704466253519058, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05668545886874199, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03399743512272835, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029526324942708015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029371237382292747, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02705872431397438, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026837486773729324, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018172612413764, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018088998273015022, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017553023993968964, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012327134609222412, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21882264316082, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19854195415973663, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19109366834163666, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1703658401966095, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10056177526712418, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0930645689368248, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11712993681430817, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10657177865505219, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10301310569047928, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.087583988904953, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08303246647119522, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05969599261879921, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05104193091392517, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.048345692455768585, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.047685228288173676, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029952876269817352, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025336209684610367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025096071884036064, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022805990651249886, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022398406639695168, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016137225553393364, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01621735282242298, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015202696435153484, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011450045742094517, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.0691155269742012, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06064419075846672, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.055903058499097824, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.04894397407770157, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.03140527382493019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.027317339554429054, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.039887793362140656, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03654395788908005, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.032683517783880234, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.026492655277252197, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.02508251927793026, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.020251933485269547, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01744946651160717, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.015151155181229115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.014562798663973808, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010139787569642067, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.00793787557631731, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.007649464998394251, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0069715953432023525, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.0065899319015443325, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00531613826751709, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005321819800883532, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0044431076385080814, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003574896603822708, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06264452636241913, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.053710706532001495, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04869312793016434, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.04237726330757141, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.027963943779468536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.023644985631108284, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03655276075005531, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03334158658981323, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.02939520962536335, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02322898805141449, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0220949724316597, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.018569806590676308, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01593693159520626, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.013482061214745045, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01284573320299387, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.00928813498467207, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007051664870232344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0067406948655843735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006124598905444145, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.005698035005480051, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004833201412111521, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004788793623447418, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0038881090003997087, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003145880065858364, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.19789665937423706, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1749677211046219, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16641153395175934, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14534318447113037, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0908576026558876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08178525418043137, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10804267227649689, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09826453030109406, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09399299323558807, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07642340660095215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07131816446781158, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05493615195155144, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.046928465366363525, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04357217252254486, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04275175929069519, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027470426633954048, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.022433489561080933, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.022074483335018158, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019521409645676613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018973330035805702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01422063633799553, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013953357934951782, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01269309502094984, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008985511027276516, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20577001571655273, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.187205508351326, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17981819808483124, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15731102228164673, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0955362394452095, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08804290741682053, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11225511133670807, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10203532129526138, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09783267974853516, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08214867860078812, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07689009606838226, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.057596344500780106, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.049087364226579666, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.046180978417396545, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0454733781516552, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02883586660027504, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.024302661418914795, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023983445018529892, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02167672850191593, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.021226167678833008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01538254227489233, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015758128836750984, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0142836207523942, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011252795346081257, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17838558554649353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16714052855968475, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1633693277835846, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1473636031150818, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.084012471139431, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07976633310317993, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09431228786706924, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08677013963460922, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08517853915691376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07520353049039841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07117349654436111, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04820563271641731, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04161243885755539, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04036497324705124, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04007614403963089, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024119490757584572, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020972944796085358, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020839182659983635, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019213085994124413, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019031675532460213, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012854495085775852, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01299167238175869, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012349545955657959, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008972298353910446, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2461176961660385, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2308550626039505, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22593429684638977, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20383134484291077, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11614830046892166, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11040712147951126, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12983271479606628, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11953168362379074, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11774373799562454, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10392578691244125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0983300730586052, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06627895683050156, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0572364404797554, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05570352077484131, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05534171313047409, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03313387930393219, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028673985973000526, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02850285917520523, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026195431128144264, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025967396795749664, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01743285544216633, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01732751727104187, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016786476597189903, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011498376727104187, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22131182253360748, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20178307592868805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19457748532295227, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17374691367149353, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10184930264949799, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09456514567136765, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11838619410991669, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10775811970233917, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10418097674846649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08912879228591919, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08430664986371994, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.060315489768981934, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05168139189481735, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.049039456993341446, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04840303212404251, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030334465205669403, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025831809267401695, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02558664046227932, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023365933448076248, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022964321076869965, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016492707654833794, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016688615083694458, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.0155820082873106, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011971556581556797, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.08910305798053741, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.078480064868927, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07268496602773666, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06375962495803833, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.040582235902547836, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.03552938625216484, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.051467422395944595, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0468125119805336, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04215244948863983, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03436363860964775, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03258935734629631, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02616771124303341, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.022448863834142685, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.019638437777757645, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.018931373953819275, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013137631118297577, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.010431504808366299, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.010086048394441605, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009239557199180126, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.008787566795945168, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006962960120290518, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0071305446326732635, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.005887188017368317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005026595667004585, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.07882285118103027, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06843134015798569, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06262272596359253, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05466156825423241, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.035481102764606476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.030435431748628616, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.045937880873680115, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04177936166524887, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03710472211241722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.029768522828817368, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.02836943045258522, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.023357272148132324, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.019977929070591927, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.017090391367673874, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.016357218846678734, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011695531196892262, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.00892035011202097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.008550561033189297, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0077935862354934216, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0073041124269366264, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006091906223446131, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005981590133160353, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.00497867725789547, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0039173006080091, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2019529640674591, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18193663656711578, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17492182552814484, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1530633419752121, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09356392920017242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08576379716396332, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10799380391836166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09902622550725937, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09588927775621414, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07962820678949356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07400796562433243, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05487731471657753, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04726940020918846, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04476409777998924, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.044171568006277084, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027388356626033783, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.022763218730688095, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02246767468750477, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01985841803252697, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0194600448012352, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01404790673404932, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013556619174778461, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012935735285282135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008230878040194511, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.21446581184864044, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1941494643688202, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18660257756710052, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1615847647190094, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09969910234212875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09149825572967529, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11615409702062607, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10569865256547928, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10195803642272949, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0840408131480217, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07858788967132568, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05960802361369133, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05076587200164795, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.047996848821640015, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.047325022518634796, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029789047315716743, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02497956156730652, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02465859428048134, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021836096420884132, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.021411346271634102, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01572103425860405, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01576652191579342, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014633448794484138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010829092934727669, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17731119692325592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16566158831119537, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1617959588766098, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14580819010734558, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08344192802906036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07903989404439926, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09389957040548325, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08634775131940842, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08472657203674316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07451940327882767, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07061102241277695, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04808998480439186, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.041502244770526886, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.040191490203142166, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.039890483021736145, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024107588455080986, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02107296884059906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020931968465447426, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019311606884002686, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01911836676299572, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012994501739740372, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01333310641348362, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01247426401823759, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009515407495200634, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23879876732826233, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2232351303100586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21821458637714386, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1966523826122284, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.1123802438378334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1065739095211029, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12600786983966827, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11589046567678452, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11407271772623062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10031693428754807, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09500443935394287, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06444498896598816, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05555428937077522, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05397858843207359, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.053598083555698395, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032244257628917694, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.0279450211673975, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027775214985013008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02551562339067459, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02528107538819313, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017146902158856392, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017155444249510765, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01648511365056038, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011695909313857555, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22360506653785706, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20417262613773346, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19683587551116943, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.175452321767807, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10309110581874847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0957108587026596, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12001800537109375, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10925670713186264, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10542978346347809, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09018214792013168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08506227284669876, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06101464852690697, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05229475721716881, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04950812831521034, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.048850081861019135, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030596140772104263, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02588215284049511, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025618143379688263, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023362403735518456, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022933706641197205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01646384410560131, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01647312566637993, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015499681234359741, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011503400281071663, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.07730135321617126, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06861110031604767, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06401927024126053, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.05564160272479057, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.03538643941283226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.03130142018198967, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.043949201703071594, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04013776034116745, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03652094677090645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.029819384217262268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.028047174215316772, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.022326357662677765, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.019155366346240044, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.01704401709139347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.016512908041477203, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011173314414918423, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.00889978464692831, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.008632585406303406, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007802382577210665, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.007452079560607672, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0058660730719566345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005846804939210415, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.005038703791797161, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003924648277461529, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.07226452976465225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06337123364210129, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05878634750843048, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05075661838054657, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03266598656773567, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.02861178107559681, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04113810136914253, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03741159662604332, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.033930424600839615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02730737440288067, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.025649093091487885, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0208132341504097, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.017854653298854828, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.015705222263932228, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.015168579295277596, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01041287649422884, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.008135061711072922, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.007869604974985123, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007071035914123058, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.006705517414957285, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005390993785113096, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005304029211401939, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004548318218439817, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0034339018166065216, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.189252108335495, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1694127768278122, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16225700080394745, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1407938450574875, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.08768179267644882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0797121450304985, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10259972512722015, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09325975924730301, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.08998018503189087, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07374297827482224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.06847531348466873, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0521862730383873, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04456280171871185, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04200167953968048, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.041376013308763504, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.026073023676872253, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02149876579642296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.021200256422162056, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018649255856871605, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018224988132715225, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013455560430884361, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01308076549321413, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012248924933373928, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008262157440185547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20203925669193268, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1826143115758896, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17492122948169708, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15353961288928986, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09335009008646011, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08532552421092987, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1104763075709343, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10030290484428406, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09586994349956512, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0800187885761261, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07578163594007492, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05689898505806923, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04834501072764397, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0451795794069767, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0444183312356472, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028558656573295593, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.0239496361464262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023598842322826385, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021354271098971367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020872117951512337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01547370757907629, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015791766345500946, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014286775141954422, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011473551392555237, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17294113337993622, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16074956953525543, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15651266276836395, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1405915468931198, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08116263896226883, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07652445882558823, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09200085699558258, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08439956605434418, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08255597949028015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07203374803066254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06814655661582947, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.047101572155952454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04055360332131386, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.039095256477594376, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03875862807035446, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02363847754895687, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02052280865609646, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020368093624711037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018726157024502754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018514851108193398, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012777402065694332, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013044721446931362, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012189937755465508, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009328574873507023, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2291836440563202, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2132311463356018, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20788347721099854, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.18663713335990906, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10758797079324722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10158257931470871, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12126976996660233, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11146469414234161, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.10937771201133728, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09542948752641678, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0901680588722229, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0619560107588768, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05339083448052406, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05165499448776245, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05124164745211601, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03099323995411396, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.026727324351668358, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.026537522673606873, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02427973598241806, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.024018555879592896, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016409989446401596, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01641855761408806, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015676040202379227, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01113896630704403, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23287688195705414, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2126849740743637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20530810952186584, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18286387622356415, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10775326192378998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10020114481449127, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12510976195335388, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11363277584314346, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11012694984674454, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09421702474355698, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08903686702251434, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06398025155067444, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.054577216506004333, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05188451334834099, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.051251161843538284, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03214545547962189, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02729383297264576, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027050340548157692, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024666303768754005, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024261943995952606, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017438383772969246, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017565881833434105, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01651502028107643, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012570399791002274, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.08981296420097351, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07945431768894196, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.074099101126194, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06437059491872787, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.041264861822128296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.036397673189640045, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05113008990883827, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04668482765555382, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.042583972215652466, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03463399410247803, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03252527862787247, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.026015641167759895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.022384116426110268, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.019956719130277634, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.01934872381389141, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013033546507358551, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.010553021915256977, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.010250968858599663, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.00927114300429821, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.008876525796949863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006899710278958082, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007096196990460157, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0059640672989189625, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00498809851706028, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.07805461436510086, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06865237653255463, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06312691420316696, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05456852167844772, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.035389162600040436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.030724631622433662, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04532425105571747, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04124649241566658, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03667592629790306, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.029614387080073357, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.02793373540043831, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.022975871339440346, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01967964693903923, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.017068330198526382, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.016412794589996338, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01149218250066042, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.008954349905252457, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.00862752553075552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007813332602381706, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.007386517710983753, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006040631793439388, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006014214362949133, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005062204785645008, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004061834886670113, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.19961042702198029, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17660541832447052, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16833916306495667, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14525721967220306, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09181346744298935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08271550387144089, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10879362374544144, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09827765822410583, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09459634125232697, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07646556943655014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07081640511751175, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05544276908040047, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.046915095299482346, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04395373910665512, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.043245114386081696, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027756990864872932, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02251787669956684, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.022189904004335403, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019385861232876778, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018894970417022705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014333385042846203, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013777525164186954, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012831572443246841, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008735796436667442, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2068634182214737, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1881418377161026, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1743212193250656, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15250174701213837, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09653005003929138, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08540575206279755, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1268123984336853, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1136607751250267, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09905886650085449, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08368156850337982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07939007878303528, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06571078300476074, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05497307330369949, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04712365195155144, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04508167505264282, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032977573573589325, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025388678535819054, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.024353712797164917, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02294320985674858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02169644646346569, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017886647954583168, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01795906573534012, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015140185132622719, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013045377098023891, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17543837428092957, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.162674680352211, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1581297516822815, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14198127388954163, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08238416910171509, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0774427205324173, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09395347535610199, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08596694469451904, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08386443555355072, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07293327152729034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06905556470155716, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.048192668706178665, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04142540320754051, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.039808277040719986, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03942031413316727, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024234391748905182, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021076377481222153, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020904770120978355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01924036256968975, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019006120041012764, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013256324455142021, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01366385631263256, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01261316891759634, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010029315017163754, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.22885091602802277, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.212577223777771, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20704670250415802, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1857248991727829, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10738953948020935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10116197168827057, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12154887616634369, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11152967810630798, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.10924369841814041, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09503036737442017, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08972864598035812, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06195441633462906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05339762941002846, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05151596665382385, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05107216164469719, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03101535141468048, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02658412791788578, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02638285979628563, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024095917120575905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023812979459762573, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016351599246263504, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016248183324933052, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015555593185126781, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010865756310522556, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2341059148311615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21377237141132355, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.206282839179039, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1834448277950287, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10860683023929596, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10099707543849945, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12641949951648712, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11465277522802353, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11103969812393188, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09478159993886948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08952641487121582, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06466670334339142, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.055120810866355896, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05235465615987778, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05167976766824722, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03253210335969925, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027578987181186676, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02732447162270546, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024889612570405006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024469753727316856, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017593568190932274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017802052199840546, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016594616696238518, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012775403447449207, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.09715305268764496, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08539910614490509, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07880707085132599, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06852563470602036, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04448634013533592, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0387553945183754, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05738317221403122, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05142911896109581, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04611479118466377, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03730863705277443, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03547699749469757, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.029379496350884438, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.024750791490077972, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02157082036137581, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02076842077076435, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014783439226448536, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011498776264488697, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011100245639681816, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01012887991964817, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.009610702283680439, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007910825312137604, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007925624959170818, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006630826275795698, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005640725139528513, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08578880876302719, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07487978786230087, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06866021454334259, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05947569012641907, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03887925297021866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03357113152742386, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05033872649073601, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.045328233391046524, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04041195660829544, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03243082016706467, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.030699249356985092, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.025567293167114258, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021756382659077644, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.018810974434018135, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018061218783259392, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012843226082623005, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009933165274560452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009568150155246258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008683864027261734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008196189999580383, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006792915984988213, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006776769179850817, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005646056495606899, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004683437291532755, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20098572969436646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1793464571237564, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17118293046951294, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14899326860904694, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09320465475320816, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08446937054395676, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11113560944795609, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09997668117284775, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.0958901047706604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0784226655960083, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07312232255935669, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.056663062423467636, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04785233736038208, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.044704731553792953, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.043937798589468, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02831083908677101, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.022877156734466553, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.022527718916535378, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019846690818667412, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.019329894334077835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014625810086727142, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014005187898874283, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013132983818650246, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00884461123496294, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.22275157272815704, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19938285648822784, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19090715050697327, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.16635727882385254, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10321079194545746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09386981278657913, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12205725163221359, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11040586233139038, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.1062731072306633, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08768437057733536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08164525777101517, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06292253732681274, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05334939435124397, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.050076670944690704, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.049262065440416336, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031680766493082047, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.026591958478093147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026239901781082153, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023573558777570724, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.023069383576512337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01722208596765995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01754012703895569, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015989629551768303, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012828470207750797, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17753681540489197, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16419512033462524, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15953458845615387, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14315952360630035, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08349507302045822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07837503403425217, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09522802382707596, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08703580498695374, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08506175875663757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07372992485761642, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0697949081659317, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0489283911883831, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04206782951951027, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04049460217356682, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04012366384267807, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02463913895189762, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021656757220625877, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021491168066859245, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019786247983574867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01955971121788025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013619913719594479, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01430120412260294, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.013002509251236916, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010793305933475494, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23470452427864075, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21738477051258087, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21172021329402924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.18979111313819885, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11023224145174026, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10370729863643646, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12443455308675766, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11427059024572372, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1121986135840416, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09728677570819855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09173539280891418, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06370889395475388, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.054822828620672226, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.052995599806308746, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05256131663918495, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03187590837478638, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02756093628704548, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027370724827051163, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024977732449769974, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.024702779948711395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016990739852190018, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01714368909597397, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016237495467066765, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011864820495247841, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23485197126865387, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21377038955688477, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2052312046289444, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18258275091648102, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10867457836866379, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10027255117893219, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12796176970005035, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11624100059270859, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11122553795576096, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09464879333972931, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08956869691610336, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06524835526943207, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05580282583832741, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05243517830967903, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05161484703421593, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03286735340952873, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027713999152183533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02738061361014843, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025001810863614082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024481691420078278, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017863204702734947, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018100788816809654, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016691409051418304, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013048271648585796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.09396975487470627, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08392154425382614, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07925210148096085, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06902283430099487, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.043343983590602875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.03889898955821991, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.052451204508543015, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04789295420050621, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04459865018725395, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.036594998091459274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03432600200176239, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.026682918891310692, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.022890420630574226, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.020844779908657074, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.020350292325019836, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013346112333238125, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.010855739936232567, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.010605488903820515, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009512039832770824, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.009182061068713665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0070000761188566685, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006991868373006582, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006176850758492947, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0047008744440972805, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.0799255520105362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07129500061273575, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06665036082267761, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05802135914564133, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03666353225708008, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03252578526735306, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04553423449397087, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04142383858561516, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.037722691893577576, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.030972851440310478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.029152968898415565, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.023075208067893982, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.019781483337283134, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.01764029823243618, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.017105719074606895, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01154383271932602, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009181073866784573, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0089074382558465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008049440570175648, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.007698097266256809, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006028869189321995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005983538925647736, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005207897163927555, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003962153103202581, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.21270257234573364, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1901855170726776, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18239402770996094, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1583280861377716, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0983302965760231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08955512940883636, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11502860486507416, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10426308959722519, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10090483725070953, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08268465101718903, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07684684544801712, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05869773402810097, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04974398389458656, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04704693332314491, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04637788608670235, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02925855480134487, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.023984163999557495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.023663105443120003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020758843049407005, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020302746444940567, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015027782879769802, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014396249316632748, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013633130118250847, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008872074075043201, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23550382256507874, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2145477831363678, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.2065887451171875, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.18379716575145721, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.1095576211810112, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10105866938829422, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12760934233665466, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11643887311220169, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11197017878293991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09477290511131287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.09021754562854767, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0656350776553154, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.056147851049900055, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.053007449954748154, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.052260082215070724, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03292495384812355, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.028066696599125862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.027703074738383293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025195756927132607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024723412469029427, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017721008509397507, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01837899163365364, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01651874929666519, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013369257561862469, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18363343179225922, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1699117124080658, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16502133011817932, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14816728234291077, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08641481399536133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08105594664812088, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09851227700710297, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09020079672336578, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08803214877843857, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07630668580532074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07224001735448837, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05057576671242714, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043535955250263214, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.041821133345365524, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04141022264957428, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025404799729585648, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.022231873124837875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.022047873586416245, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020282980054616928, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.020028389990329742, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013841996900737286, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01453828439116478, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.013161084614694118, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010784115642309189, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24147915840148926, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2238529771566391, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21799565851688385, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19544926285743713, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11354124546051025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10682559758424759, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12840281426906586, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11778019368648529, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1155599057674408, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10019879043102264, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09450459480285645, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0656755194067955, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05650630220770836, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0545889250934124, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0541362427175045, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03288539499044418, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028328251093626022, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028126342222094536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02565816603600979, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02536972612142563, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017530858516693115, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017543865367770195, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01673092320561409, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012043127790093422, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.24687746167182922, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22386179864406586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21541069447994232, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1911175549030304, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11425571888685226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10550329089164734, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13356681168079376, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1211434081196785, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11711462587118149, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09896250069141388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.09358968585729599, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06836560368537903, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.058142345398664474, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05501197651028633, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05426877364516258, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.034382693469524384, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.028858620673418045, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02857285737991333, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025865942239761353, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02538750134408474, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018584884703159332, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018503041937947273, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01748696155846119, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013068282045423985, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1012606993317604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09063403308391571, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08489318937063217, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07434715330600739, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04675355181097984, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04166419804096222, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.058033380657434464, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0528259240090847, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04812027886509895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03979805111885071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.037550777196884155, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.029525399208068848, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02531798928976059, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.022573305293917656, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02188127301633358, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014786897227168083, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011866290122270584, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011523991823196411, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010501307435333729, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010058358311653137, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007794767152518034, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007885043509304523, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006733269430696964, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005439877975732088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08260630816221237, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07415365427732468, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06811942160129547, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05976027250289917, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03782864660024643, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03307710215449333, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.049304574728012085, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.044744353741407394, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03893322870135307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.032418735325336456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.030840441584587097, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024898996576666832, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021349862217903137, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.018250830471515656, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.017448123544454575, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012460418976843357, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009541328996419907, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009133150801062584, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008472226560115814, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00796218030154705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006523756310343742, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0064339893870055676, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005393897648900747, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004257240332663059, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.22405266761779785, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2010050266981125, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19256632030010223, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1677653044462204, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10384178906679153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09475373476743698, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12334273755550385, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11078497767448425, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10664699226617813, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08783884346485138, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08204057067632675, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0629723072052002, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0529911145567894, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04982578754425049, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04902924597263336, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031513821333646774, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02556302770972252, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025186579674482346, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02232925221323967, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02180180884897709, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016281448304653168, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015700852498412132, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014637810178101063, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010094346478581429, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.24845221638679504, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.22099769115447998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.2102019190788269, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.17793405055999756, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11524109542369843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10441948473453522, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13816890120506287, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12502573430538177, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11898040771484375, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09484917670488358, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08884866535663605, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.07101860642433167, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.06033441796898842, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.055869996547698975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05474749580025673, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03559674322605133, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02963833138346672, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0291457362473011, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02570304088294506, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024982940405607224, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.019105955958366394, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.019652996212244034, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01748253032565117, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014221671968698502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17512373626232147, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16234833002090454, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15751910209655762, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1416473537683487, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08258399367332458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07743815332651138, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09478088468313217, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08675966411828995, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08413971960544586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07324937731027603, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06939949095249176, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.048713162541389465, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04195103794336319, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.040046729147434235, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03959687054157257, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024473844096064568, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021395957097411156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021193645894527435, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0196033027023077, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01933583989739418, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013362388126552105, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01416017021983862, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01261943019926548, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010625606402754784, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2386370301246643, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22181352972984314, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.215997114777565, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19424192607402802, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11242380738258362, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10592083632946014, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12747833132743835, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11687415093183517, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11434562504291534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09973901510238647, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09423502534627914, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06523597985506058, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.056102454662323, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.054075583815574646, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05358915030956268, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03268880769610405, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02812221087515354, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027904238551855087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02558957412838936, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025284891948103905, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017468517646193504, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017511609941720963, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016626328229904175, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012090599164366722, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23533707857131958, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21307411789894104, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.204370379447937, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18142254650592804, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10881828516721725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10012310743331909, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1288265585899353, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1162075623869896, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11157510429620743, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09424737095832825, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08932245522737503, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06591920554637909, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.055986642837524414, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.052670903503894806, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05181878060102463, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03336533531546593, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.028062673285603523, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027706488966941833, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02528602071106434, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024730978533625603, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018366949632763863, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018624981865286827, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01696428284049034, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013684611767530441, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11527643352746964, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10391019284725189, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0978332906961441, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08611054718494415, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05336505174636841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04795213043689728, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06617261469364166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05975998193025589, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.054825931787490845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.045840244740247726, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04338635876774788, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03379154950380325, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028713645413517952, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02579774335026741, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.025067880749702454, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016986757516860962, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01361310575157404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01324410643428564, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012139451690018177, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011675894260406494, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009015623480081558, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00906714703887701, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007822985760867596, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006343604065477848, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08989278972148895, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08095189183950424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07405897229909897, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06523619592189789, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.041073478758335114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03580481559038162, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05456014350056648, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04920908063650131, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04228812828660011, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03552240505814552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.034056685864925385, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02772993966937065, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.023516802117228508, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.01982058770954609, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018867379054427147, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013881932012736797, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01040666550397873, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009905864484608173, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009313962422311306, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008708050474524498, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00727222440764308, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007121726870536804, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005865731276571751, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0047438982874155045, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24114984273910522, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2207307666540146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21290627121925354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.18915605545043945, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11304584890604019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.1047787144780159, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13228879868984222, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11974357068538666, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11557959020137787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09835109859704971, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0926164835691452, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06753918528556824, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.057313960045576096, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0542287714779377, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05347645282745361, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03371294215321541, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02768963947892189, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.027329538017511368, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02464432269334793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02414723113179207, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017394233494997025, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01669805310666561, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015952011570334435, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010446498170495033, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23244984447956085, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21013851463794708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20218819379806519, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1709982454776764, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.1078345850110054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09928902983665466, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12559331953525543, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11384113132953644, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.1102089211344719, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09069845080375671, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08132462948560715, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06452503055334091, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.054677095264196396, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05191469565033913, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0512758307158947, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032285988330841064, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02710442803800106, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026797659695148468, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023702464997768402, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.023270297795534134, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01722588762640953, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01718081906437874, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01615883596241474, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01195172592997551, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17531630396842957, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16301554441452026, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15848234295845032, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1427704393863678, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08261482417583466, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07781856507062912, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0943765789270401, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08657581359148026, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08409849554300308, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07362636923789978, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06983952224254608, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.048483580350875854, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0418681837618351, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04008553922176361, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03965959697961807, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02440491132438183, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021463537588715553, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02127676084637642, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01974293403327465, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01948649063706398, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013403291814029217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0142397191375494, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012717602774500847, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0107426717877388, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24434253573417664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2281128168106079, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.222539022564888, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20049838721752167, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11498657613992691, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10883118957281113, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1297098845243454, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11925768107175827, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1168660894036293, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10252971202135086, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09697521477937698, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06618674099445343, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05710103362798691, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05518819019198418, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.054728876799345016, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03308748081326485, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028406886383891106, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028195785358548164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025872502475976944, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025579378008842468, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017395803704857826, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017244692891836166, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016592537984251976, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0113963782787323, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23903606832027435, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2164183259010315, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.207886204123497, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18443602323532104, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11014078557491302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10149253159761429, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12898309528827667, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11726640909910202, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11291112005710602, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09536655247211456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.0898551344871521, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06574563682079315, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05624091997742653, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05303202196955681, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.0522615909576416, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.033078327775001526, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02783920243382454, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027540108188986778, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024948634207248688, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024453863501548767, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017962874844670296, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01790924370288849, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016854995861649513, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01265411265194416, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11323212832212448, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1028268039226532, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09763193130493164, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08620136976242065, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05243660882115364, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04768139868974686, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06310927122831345, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05774379521608353, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05369594693183899, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04527505114674568, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0427590012550354, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.032131291925907135, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.027593906968832016, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02522554248571396, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.024628711864352226, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016076551750302315, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013093636371195316, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012797078117728233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01165985781699419, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011285620741546154, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008408463560044765, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008381396532058716, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0074566942639648914, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0055967592634260654, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.0917828157544136, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0834406167268753, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07743554562330246, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.0685868188738823, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042064420878887177, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03746289014816284, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05370093137025833, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04911479726433754, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04319967329502106, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03668560832738876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.034918710589408875, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.027101730927824974, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.023382490500807762, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02023417502641678, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01942562870681286, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013570181094110012, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010484579019248486, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01006567757576704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009407544508576393, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00889410637319088, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007053103763610125, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006908230017870665, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005895310081541538, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0044347019866108894, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24784161150455475, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22858524322509766, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2214842289686203, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.19756971299648285, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11630053073167801, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10865923017263412, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13469864428043365, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12199120223522186, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.1185394898056984, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10200601816177368, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09614618122577667, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06883957982063293, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05831307917833328, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.055681683123111725, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.055062435567379, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03435099124908447, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028334032744169235, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.028040306642651558, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025363877415657043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.024949070066213608, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017652478069067, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01682388223707676, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.016333719715476036, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01034762803465128, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2418750524520874, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.22064925730228424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.213567316532135, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.18884404003620148, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11227258294820786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10407261550426483, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1292095184326172, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11774007976055145, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11459937691688538, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09713096916675568, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.09116708487272263, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06651878356933594, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.056932322680950165, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05440140143036842, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05379776284098625, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03368932008743286, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.029006604105234146, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.028731634840369225, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.026069317013025284, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.025688786059617996, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018674517050385475, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.019107693806290627, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.017705408856272697, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014233723282814026, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16799023747444153, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15642160177230835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15188486874103546, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13705524802207947, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07916805148124695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07450892776250839, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0909382775425911, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08327904343605042, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08058592677116394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07065899670124054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06706546247005463, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04666927456855774, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0402199886739254, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03837946429848671, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.037935562431812286, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02346399798989296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020475788041949272, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020275508984923363, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018840862438082695, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01857251673936844, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012835784815251827, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01352971512824297, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012134443037211895, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010116620920598507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24040713906288147, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2248275876045227, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21937668323516846, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19777514040470123, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11319214105606079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10718050599098206, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1279284805059433, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1175546795129776, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.114979587495327, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10110189765691757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09561887383460999, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06535302102565765, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05630595237016678, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05434330180287361, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05387771129608154, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03267696127295494, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028012845665216446, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02779192477464676, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025556843727827072, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025260774418711662, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01727045327425003, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017076745629310608, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01647585816681385, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01136862114071846, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23079745471477509, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20780737698078156, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19869479537010193, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17603151500225067, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10604474693536758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09694576263427734, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12517490983009338, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11395978182554245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10895688086748123, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0913320928812027, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08619791269302368, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06390544772148132, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05469333007931709, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05106320232152939, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.050167251378297806, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03223271295428276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026810158044099808, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.026461854577064514, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023938173428177834, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023374592885375023, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01752457208931446, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017345335334539413, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016270918771624565, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012216472998261452, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.09455286711454391, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08601541072130203, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08092644810676575, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.0720570906996727, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04367230087518692, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0393771268427372, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05429455637931824, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.049555614590644836, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.044850531965494156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03817570209503174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03637660667300224, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02761639840900898, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.023709841072559357, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.021055644378066063, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.020392924547195435, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01381752546876669, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011028090491890907, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01068834587931633, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009946409612894058, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.009522400796413422, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007233642973005772, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007295497693121433, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0062128473073244095, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004947783425450325, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08165859431028366, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07375713437795639, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06784820556640625, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06042163074016571, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03717811405658722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03269133344292641, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.048783548176288605, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04444902762770653, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03842776641249657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.032581981271505356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03128844499588013, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024637911468744278, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0211376640945673, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.017934074625372887, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01709722727537155, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012334032915532589, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009380647912621498, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.008955612778663635, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008467031642794609, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.007944460026919842, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006431309040635824, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006351955235004425, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005245341453701258, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004189451690763235, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.21295951306819916, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19142648577690125, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18135181069374084, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.16180254518985748, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09761235862970352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08832226693630219, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12076354026794434, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10839696228504181, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10100953280925751, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08492228388786316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08091507852077484, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.061470262706279755, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.051757052540779114, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04679860919713974, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.045604873448610306, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03071528859436512, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.024043908342719078, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.023462502285838127, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02145683765411377, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020668523386120796, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01579330675303936, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01509381365031004, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013570364564657211, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009511470794677734, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23076443374156952, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20445586740970612, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19475361704826355, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1682949960231781, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10654981434345245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09593059867620468, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12616245448589325, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1146925687789917, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10996659100055695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08923856914043427, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08224216103553772, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06489194184541702, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.055398471653461456, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05165587738156319, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05078103020787239, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03261391073465347, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.027543578296899796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02713325060904026, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024233579635620117, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.023651644587516785, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01767635904252529, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018364612013101578, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01622561737895012, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013525879010558128, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17178162932395935, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15977194905281067, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15511520206928253, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14013318717479706, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08079696446657181, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07604954391717911, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0925130695104599, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08495357632637024, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0823233425617218, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07219050079584122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06851466745138168, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04736356809735298, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04096171259880066, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03909517824649811, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03865382820367813, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023800011724233627, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02075575478374958, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020554937422275543, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01908132992684841, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018810780718922615, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012905148789286613, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013560417108237743, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01217828132212162, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009983309544622898, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24555739760398865, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22957514226436615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22396975755691528, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2021886557340622, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11565034836530685, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1095312312245369, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13062527775764465, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12003980576992035, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11747311800718307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10341367870569229, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09803604334592819, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06672526895999908, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057548101991415024, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05557403340935707, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05511154606938362, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033493414521217346, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028820818290114403, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02860487625002861, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026369035243988037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026076894253492355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01788497157394886, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01781405135989189, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017095083370804787, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012171028181910515, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2418120801448822, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21605604887008667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20604172348976135, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18220308423042297, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11101410537958145, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10095298290252686, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13201263546943665, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11962804943323135, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1143956109881401, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09492809325456619, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08962079882621765, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0673675611615181, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05743613466620445, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0535324402153492, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.052578575909137726, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03396626561880112, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02823699451982975, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027879714965820312, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025116849690675735, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024508211761713028, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018504932522773743, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01845935359597206, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01710433140397072, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01318158395588398, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11007308959960938, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10066653788089752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09502458572387695, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08464008569717407, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0510697141289711, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04632573202252388, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06327244639396667, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05754890665411949, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0523289293050766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04479978606104851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04259713366627693, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.032282084226608276, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02763853594660759, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024706054478883743, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02397441677749157, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0162256620824337, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013070066459476948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012695017270743847, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01184629462659359, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011390749365091324, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008619500324130058, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008764405734837055, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0074934461154043674, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0061739357188344, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09215971827507019, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0828557014465332, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0754622370004654, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06740497052669525, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04187721386551857, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03639661893248558, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.055794015526771545, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.050904180854558945, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.043358515948057175, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.036669518798589706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03536473959684372, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02823764830827713, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02428545244038105, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020206104964017868, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019130192697048187, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014111742377281189, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010562093928456306, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009998517110943794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009537563659250736, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008856235072016716, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007346590980887413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0072355493903160095, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005850006360560656, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00469924183562398, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23185540735721588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2090783268213272, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19873231649398804, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17842984199523926, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.107200026512146, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09741275012493134, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13129419088363647, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11778464168310165, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11074674129486084, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09346155822277069, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0892392173409462, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0672527626156807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05633708834648132, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.051507920026779175, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05033816397190094, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03362014517188072, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02640816941857338, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025852950289845467, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023598507046699524, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022831464186310768, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017401497811079025, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01643468253314495, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015129029750823975, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010331235826015472, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.211564838886261, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18722139298915863, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17920416593551636, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15260879695415497, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09646022319793701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0879431813955307, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11288890242576599, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10246239602565765, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09874226152896881, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07973051816225052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07383956760168076, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05806078389286995, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04972689598798752, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04706188291311264, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.046438682824373245, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029392065480351448, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025577858090400696, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025290638208389282, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022455304861068726, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.022058608010411263, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016482362523674965, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01751887798309326, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015477541834115982, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013604027219116688, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1629805564880371, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15149573981761932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14690043032169342, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1326228380203247, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.0765409767627716, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07190670073032379, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08803240954875946, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08088510483503342, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07801753282546997, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.068372443318367, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06485440582036972, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04502640664577484, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.038923103362321854, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.036984264850616455, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03651601821184158, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02258773148059845, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.019526995718479156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.019313108175992966, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01793028600513935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01764673739671707, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012165013700723648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012659498490393162, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011421537958085537, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009159895591437817, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23870277404785156, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22321932017803192, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21761702001094818, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19631752371788025, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11231260001659393, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10628601908683777, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1272670030593872, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11695268005132675, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11412432789802551, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1005113422870636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09509014338254929, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06499486416578293, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05606123059988022, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05396859720349312, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.053467441350221634, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032581228762865067, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027938833460211754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.0277065671980381, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025564908981323242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025246325880289078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017358187586069107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01722523756325245, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016519255936145782, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011654519475996494, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23284517228603363, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2069539576768875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19629798829555511, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17316575348377228, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10659575462341309, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09629082679748535, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12829641997814178, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11617252230644226, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11008699983358383, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09085755795240402, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08567165583372116, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06559329479932785, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05592374503612518, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05153578519821167, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05045122653245926, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03317772597074509, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027447735890746117, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027025893330574036, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02444707788527012, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.0237677451223135, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018238060176372528, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01836249604821205, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.0166949275881052, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01340953167527914, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11179127544164658, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10254280269145966, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09686291962862015, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08672941476106644, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05183468386530876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04711110144853592, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06428837776184082, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05855199694633484, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05317074432969093, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04582861065864563, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.043730728328228, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.032739635556936264, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028042085468769073, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024991333484649658, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.024222027510404587, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016408218070864677, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013068313710391521, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012683226726949215, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01188017800450325, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011401906609535217, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00865067820996046, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008606615476310253, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007488644681870937, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0058326940052211285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09182524681091309, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08349768817424774, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07585605978965759, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06801563501358032, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0418623611330986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0364454947412014, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0563199482858181, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05156078189611435, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.043213795870542526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03710399568080902, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03591751307249069, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.028576504439115524, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.024520710110664368, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020180542021989822, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019049501046538353, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014301500283181667, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010557753033936024, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009939033538103104, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00960922334343195, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008868723176419735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0074808732606470585, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007274872623383999, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005913338623940945, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004665361251682043, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2478194385766983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22578230500221252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2163207232952118, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.19413742423057556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11509407311677933, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10578615218400955, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1382603496313095, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1245407909154892, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11841809749603271, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.101035937666893, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09598508477210999, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07066911458969116, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05958828702569008, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05519983544945717, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05411367863416672, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03528359532356262, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028165297582745552, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02767913229763508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025258952751755714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.024563228711485863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018107211217284203, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01719982922077179, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015985995531082153, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010599326342344284, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2258344441652298, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20217084884643555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19404765963554382, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1668216735124588, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10430581122636795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09520960599184036, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12214623391628265, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11005397140979767, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10683305561542511, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08682805299758911, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08071260154247284, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06290590763092041, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05360712483525276, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05094949156045914, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.050319768488407135, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03186262026429176, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.027772029861807823, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.027479734271764755, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024479342624545097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024090098217129707, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017848147079348564, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.019055720418691635, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016845984384417534, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014893701300024986, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16108110547065735, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14990384876728058, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14545758068561554, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13144654035568237, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.0755748450756073, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0710982233285904, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08653847873210907, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0796586126089096, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07697884738445282, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06754980236291885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06408824026584625, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04418835788965225, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03828990459442139, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.036450307816267014, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.036009419709444046, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022178534418344498, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01913844421505928, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.018933143466711044, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01756344921886921, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01729128323495388, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011882012709975243, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012253506109118462, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011181916110217571, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00871969573199749, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23763754963874817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2222813367843628, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21673683822155, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19585244357585907, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11179046332836151, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10585036128759384, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12653088569641113, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11642027646303177, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11361539363861084, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10016338527202606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09496523439884186, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06453338265419006, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05579390004277229, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05372035875916481, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05322665348649025, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03236093744635582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027857093140482903, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027627168223261833, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025526583194732666, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02520640194416046, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017305757850408554, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017240555956959724, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016489166766405106, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011761639267206192, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22687837481498718, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2013227939605713, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.190190389752388, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16685253381729126, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10370843857526779, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0930912047624588, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12571561336517334, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1139383390545845, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10716325044631958, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08802624046802521, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.0829458013176918, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06431064009666443, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.054974768310785294, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.050179820507764816, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04901256412267685, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.032608017325401306, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02677854150533676, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02628641575574875, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02379791811108589, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023023780435323715, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01805344969034195, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018061833456158638, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01640280708670616, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013162072747945786, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11174769699573517, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10211605578660965, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09513558447360992, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08554348349571228, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05158860608935356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.046120088547468185, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06649007648229599, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06025129556655884, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.053054727613925934, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04571841284632683, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04397908225655556, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03388504311442375, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0288497656583786, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024951493367552757, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.023960337042808533, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016990091651678085, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013166327960789204, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01265853364020586, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012025851756334305, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011402116157114506, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00898471288383007, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00896298699080944, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0074914079159498215, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006157314404845238, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09272628277540207, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08330178260803223, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07391117513179779, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06655077636241913, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04197942465543747, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03536941111087799, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05941883474588394, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.053817182779312134, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.043587539345026016, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037084903568029404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03624583035707474, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030063267797231674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02563951164484024, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02030874229967594, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01886216551065445, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014989500865340233, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010723181068897247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009951597079634666, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009767374023795128, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008850776590406895, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007869934663176537, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0076713296584784985, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005932793021202087, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004999825730919838, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24075794219970703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2152351588010788, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2040635645389557, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1829075664281845, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.1102103739976883, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09935563802719116, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13557620346546173, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12183483690023422, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11423152685165405, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09564615786075592, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0913631021976471, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06940238177776337, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.058164555579423904, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05288276821374893, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05155785009264946, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.034749239683151245, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.0270902831107378, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.026459896937012672, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02411019057035446, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02324208803474903, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017859922721982002, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01689651794731617, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015274908393621445, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010502409189939499, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.24323567748069763, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20849132537841797, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1966649889945984, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.16843771934509277, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11132003366947174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09765509516000748, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13182759284973145, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11989805847406387, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11537645757198334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08950826525688171, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08291106671094894, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06772777438163757, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05790357291698456, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05400889739394188, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05305027589201927, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03411576896905899, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.028878364711999893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02844340167939663, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024710820987820625, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02409793622791767, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018641581758856773, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01933763176202774, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.017128214240074158, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014366132207214832, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16922970116138458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1581164002418518, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15390002727508545, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13905474543571472, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07960924506187439, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07522760331630707, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09036866575479507, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08329179883003235, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08092986047267914, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0712634027004242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06750772893428802, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.046153724193573, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03999774530529976, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03833947330713272, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.037943035364151, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023114709183573723, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020051512867212296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.019865013659000397, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018394647166132927, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018150193616747856, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012356475926935673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012679574079811573, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011723567731678486, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008916891179978848, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25086095929145813, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23525136709213257, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22979764640331268, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2075852006673813, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11801891028881073, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11205220222473145, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13287167251110077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12228527665138245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11978885531425476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1058482676744461, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10026658326387405, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06764578074216843, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05856766551733017, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.056633733212947845, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0561734214425087, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033959440886974335, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02918962761759758, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028975190594792366, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02671945095062256, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02643091045320034, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018145738169550896, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01776311732828617, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017391500994563103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011824087239801884, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2291693389415741, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20355378091335297, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19302140176296234, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16963011026382446, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10464301705360413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09431031346321106, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.124956414103508, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11397331207990646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10801846534013748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08872951567173004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08344950526952744, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06377210468053818, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05463777482509613, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05032818391919136, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.0492597259581089, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0321563184261322, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026345085352659225, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025923077017068863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02323765493929386, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02255159802734852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017411701381206512, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017071282491087914, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015892164781689644, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011817269027233124, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10894070565700531, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09877091646194458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09180149435997009, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08259885758161545, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.050150640308856964, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04459427669644356, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06447620689868927, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05862698704004288, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05174729973077774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.044217273592948914, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.042535193264484406, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03280109912157059, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028024721890687943, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02421606332063675, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02323504351079464, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016423210501670837, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012719037011265755, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012219107709825039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011552772484719753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010934913530945778, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008618002757430077, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008585609495639801, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007156326435506344, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005786116234958172, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.0943751409649849, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08441128581762314, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07531294226646423, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06775697320699692, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042745884507894516, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03614049777388573, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05981197580695152, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.054301731288433075, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.044527407735586166, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037667978554964066, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.036793336272239685, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030360007658600807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.025923801586031914, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020698290318250656, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01930345594882965, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015181426890194416, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010937364771962166, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010211050510406494, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009944874793291092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009052656590938568, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007923793978989124, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007800531107932329, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006027610041201115, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005132413934916258, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23736801743507385, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20973485708236694, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19642147421836853, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17581123113632202, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10787738114595413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09563668072223663, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13620014488697052, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1222366914153099, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11254683881998062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09303447604179382, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08897384256124496, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06988875567913055, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05835515260696411, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.051716819405555725, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05004201829433441, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035005804151296616, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.026562003418803215, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025763338431715965, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023556487634778023, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022452181205153465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01802624762058258, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016901636496186256, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01489462424069643, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010477732867002487, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.25058087706565857, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2240896373987198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.21516288816928864, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.18136383593082428, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11559724807739258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10555557161569595, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1326684206724167, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12205904722213745, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11827317625284195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09494823962450027, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08694610744714737, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06799913942813873, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05859541893005371, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05560881644487381, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05489826947450638, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.034139178693294525, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02907831035554409, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02875477634370327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024997344240546227, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024535028263926506, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01828760839998722, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018481940031051636, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01710161752998829, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012959788553416729, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16972194612026215, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1588301807641983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15477728843688965, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13972516357898712, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.0799519345164299, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0756981298327446, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09029948711395264, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08333675563335419, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0812196135520935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07160330563783646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06776595860719681, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04611651226878166, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04000251740217209, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.038487229496240616, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03812829777598381, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02310078777372837, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020074520260095596, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01990598998963833, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018415579572319984, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018189016729593277, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01234456617385149, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012594389729201794, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011771566234529018, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008790192194283009, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2614101469516754, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2453160583972931, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.239889994263649, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21665942668914795, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12315870821475983, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11711719632148743, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13802096247673035, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12728740274906158, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12492972612380981, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11051113903522491, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.1045079231262207, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0704721063375473, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06096865236759186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05912341922521591, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05868246778845787, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03526976704597473, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03045276179909706, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.030252810567617416, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02788017876446247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.027605636045336723, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018733005970716476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018479639664292336, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01800265721976757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012289660051465034, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23084014654159546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20520946383476257, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19444629549980164, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17141160368919373, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10526919364929199, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09490039944648743, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1258649230003357, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1150021180510521, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10864713788032532, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08950526267290115, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08436249196529388, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0638660416007042, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0550110824406147, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05058523267507553, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.049495719373226166, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.032216254621744156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026409929618239403, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02597738802433014, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023315127938985825, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022612852975726128, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017416756600141525, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017030037939548492, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015928102657198906, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011660969816148281, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11147601902484894, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10048612952232361, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09299404174089432, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08360052108764648, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05115680396556854, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04516442492604256, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0665188878774643, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0603136345744133, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05299936234951019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.044996313750743866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04343458637595177, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03397168964147568, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02890927530825138, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024732369929552078, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02365298382937908, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017037611454725266, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01302504725754261, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012479369528591633, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011815754696726799, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011132798157632351, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009056816808879375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008879413828253746, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007490464951843023, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0060056596994400024, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10108033567667007, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09034782648086548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0804382860660553, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07239359617233276, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04592336341738701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.038725271821022034, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0641566589474678, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0583484023809433, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04782426729798317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.040363799780607224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03938225656747818, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03260837867856026, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027894601225852966, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02222253568470478, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.02071007899940014, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016259055584669113, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011744758114218712, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010953151620924473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01067025400698185, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00970240868628025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008501437492668629, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008389054797589779, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006469243206083775, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005518340039998293, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2322850376367569, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2036956399679184, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18949079513549805, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.16934284567832947, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10507857799530029, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09219677001237869, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13468235731124878, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12069160491228104, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10994899272918701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09022693336009979, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08656906336545944, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06880255043506622, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05759001150727272, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05042608454823494, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.048616208136081696, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.034399472177028656, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.025883285328745842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025024713948369026, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022869406268000603, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02166873961687088, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017645955085754395, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016618072986602783, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014378370717167854, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010251731611788273, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23340699076652527, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.201211079955101, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18919001519680023, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.14922896027565002, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10719141364097595, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09432792663574219, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12794052064418793, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11669991165399551, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11131159216165543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08197597414255142, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07398759573698044, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06551608443260193, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.056240130215883255, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.051935937255620956, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.050884079188108444, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032847173511981964, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.027566829696297646, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.027117837220430374, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022722451016306877, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02201000414788723, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017727652564644814, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01830950379371643, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016054371371865273, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01328368578106165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18418757617473602, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1722533106803894, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16790185868740082, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15158604085445404, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08671881258487701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08206260204315186, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09775162488222122, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09033061563968658, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08810167014598846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07761628925800323, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07333091646432877, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04981488361954689, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043285347521305084, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0416557602584362, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04126875102519989, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02490292303264141, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02156391367316246, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02138587087392807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019728578627109528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01948717050254345, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013106768019497395, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013300422579050064, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012478144839406013, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009015166200697422, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2666207253932953, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24994248151779175, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24438276886940002, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2206498384475708, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.1255226582288742, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11930101364850998, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1403743326663971, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12973690032958984, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12737827003002167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11257345229387283, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10630577802658081, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.071465864777565, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.062064029276371, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.060167811810970306, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.059723421931266785, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035697538405656815, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03083745203912258, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.03062656708061695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02817920781672001, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.027890345081686974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018630102276802063, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018481232225894928, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01785840280354023, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011957895942032337, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2392767071723938, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21289926767349243, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20199070870876312, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17893455922603607, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10932300239801407, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09867586195468903, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13079915940761566, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11920606344938278, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11281375586986542, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09335476905107498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08819039165973663, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06641173362731934, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05707821249961853, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.052596915513277054, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05150120332837105, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03348654508590698, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027523688971996307, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027084315195679665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024395901709794998, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02368498221039772, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018211254850029945, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017811231315135956, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016670554876327515, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01231044065207243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11163932085037231, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10114680230617523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09423626959323883, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08473335951566696, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05138453096151352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04576012119650841, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06589025259017944, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05983511358499527, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05310133472084999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04527689516544342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.043515950441360474, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.033544886857271194, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028598975390195847, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024773649871349335, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.023800380527973175, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016799114644527435, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012935302220284939, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012447330169379711, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011727005243301392, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011107501573860645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008837123401463032, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008631404489278793, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007378046400845051, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005706119816750288, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09945126622915268, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0886220633983612, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0800168365240097, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07193832844495773, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0450296476483345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03862548992037773, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.061027027666568756, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05563238263130188, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.046964239329099655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0394991859793663, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03824745863676071, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030843833461403847, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026515090838074684, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.021754464134573936, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.020480912178754807, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015430256724357605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011390458792448044, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01075532753020525, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010291953571140766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009504313580691814, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00802066270262003, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007883585058152676, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006288829259574413, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0051208543591201305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24608907103538513, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22038771212100983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20880833268165588, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.18698348104953766, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11279966682195663, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10177579522132874, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13780826330184937, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12500543892383575, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11690637469291687, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09803866595029831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09331604838371277, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07030119746923447, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0596502311527729, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05409802868962288, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05273262411355972, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0351378358900547, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.027687177062034607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.027054661884903908, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024672266095876694, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.023773442953824997, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018065784126520157, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01727456972002983, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015557637438178062, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010715796612203121, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23534290492534637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19952502846717834, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1872437298297882, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1504630446434021, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.1069660484790802, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09249737858772278, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12690705060958862, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11595474183559418, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11129754781723022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08272451162338257, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07420212775468826, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06492456048727036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05579738691449165, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05175263062119484, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.050743766129016876, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03254813700914383, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02729354053735733, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02684720978140831, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022599078714847565, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02191718854010105, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017556874081492424, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01788962259888649, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015958555042743683, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012742072343826294, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18806077539920807, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17592841386795044, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17148646712303162, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15475870668888092, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.0885704904794693, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08380842208862305, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09994670003652573, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09216117858886719, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08996351063251495, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07918179780244827, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07491172105073929, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.050926074385643005, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04419756308197975, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04257810115814209, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04219432920217514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025557566434144974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02208591438829899, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021910684183239937, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020204778760671616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019964158535003662, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01366393081843853, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013682438060641289, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01304465252906084, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009343470446765423, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.27220624685287476, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.25520968437194824, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24953150749206543, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.22515861690044403, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12832780182361603, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.12187555432319641, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14358454942703247, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13255620002746582, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.13017068803310394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11490518599748611, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10844384133815765, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07317405194044113, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06342219561338425, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.06150310859084129, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.061048272997140884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03658445179462433, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03150777518749237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.031296152621507645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.028746608644723892, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.028455497696995735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019212866201996803, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01884501986205578, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01844012923538685, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012183197773993015, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.24373655021190643, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21701818704605103, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20558786392211914, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18251021206378937, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11131592094898224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10034153610467911, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13311141729354858, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12195637822151184, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11493988335132599, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09525039792060852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.09028742462396622, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06774792075157166, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.058373454958200455, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05350892245769501, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.052315495908260345, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03408534824848175, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027907509356737137, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027416717261075974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024738192558288574, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023961544036865234, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018247265368700027, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017976252362132072, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01653502881526947, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012217087671160698, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11904940009117126, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10819849371910095, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1008581668138504, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09081161767244339, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.055039361119270325, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.049046725034713745, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07025649398565292, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0639699175953865, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.056700561195611954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.048546988517045975, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0466989204287529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0358397476375103, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030655913054943085, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02658892422914505, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02554677054286003, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01793590374290943, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013973291963338852, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013447440229356289, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012695214711129665, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.012050659395754337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009425092488527298, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009416528977453709, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007899822667241096, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006391560658812523, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10749021172523499, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09693123400211334, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08739257603883743, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07863394916057587, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04912206903100014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.04208862781524658, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.067047618329525, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06118723377585411, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.05089905485510826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04335059970617294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04218937084078789, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03413992375135422, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02925952896475792, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02375011146068573, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.022253096103668213, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017057374119758606, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.012431290000677109, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.011665252968668938, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011295106261968613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.01035404670983553, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008896222338080406, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008666926994919777, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006902877241373062, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005597029812633991, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2602056562900543, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.23536106944084167, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.22505417466163635, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.20159770548343658, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.12013006210327148, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.11002396047115326, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14425800740718842, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13069136440753937, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.12397436797618866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10506925731897354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09978693723678589, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07342605292797089, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0624157153069973, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.057630933821201324, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.056449469178915024, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03667762875556946, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02941952645778656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.028870215639472008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.026291778311133385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.025523679330945015, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018837954849004745, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01803452894091606, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01661190763115883, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011108431965112686, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2293630689382553, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2003541737794876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18839608132839203, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1596321016550064, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10583258420228958, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09321698546409607, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12654335796833038, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11598812788724899, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.1088939979672432, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08577379584312439, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07806531339883804, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06463868916034698, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.055600956082344055, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05093419924378395, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04980067163705826, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032356590032577515, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02653210051357746, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025965958833694458, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022545263171195984, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.021764719858765602, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01715695671737194, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01705550029873848, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015368195250630379, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011513866484165192, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18304666876792908, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17112071812152863, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16684338450431824, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1504509449005127, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08629842847585678, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08166006207466125, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0972968190908432, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08980339020490646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08766349405050278, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07710053026676178, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07285171747207642, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0496247373521328, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04304081201553345, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0414675697684288, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04109533503651619, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02484418824315071, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021477092057466507, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021301936358213425, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0196232870221138, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019388390704989433, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013193966820836067, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01325136236846447, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01258657593280077, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008990805596113205, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.27565836906433105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2583853304386139, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.25261208415031433, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.227885439991951, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.13000507652759552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1235009953379631, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14588183164596558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13427889347076416, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1318831741809845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11633385717868805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10998649150133133, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07442523539066315, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0643484890460968, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.062423642724752426, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.06197293847799301, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03733423724770546, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03216121718287468, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.03194991871714592, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0293800700455904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.029097117483615875, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019936073571443558, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019509615376591682, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01916845142841339, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012975404970347881, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2432844489812851, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21641826629638672, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20467814803123474, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1824696958065033, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11103689670562744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09983674436807632, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13364596664905548, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12213827669620514, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11472369730472565, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09524569660425186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.09044715762138367, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06775090098381042, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05847512185573578, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05339961498975754, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.052145760506391525, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03413508087396622, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02789800986647606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027381805703043938, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0247937198728323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023984363302588463, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01843113638460636, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018077803775668144, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016724184155464172, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012349030002951622, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.12533243000507355, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11415334790945053, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10664521902799606, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.0959629938006401, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05801605060696602, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.051879461854696274, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07366746664047241, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0671568512916565, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.059712450951337814, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0512092188000679, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04916597530245781, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.037510767579078674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.032166969031095505, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.027981236577033997, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.026932701468467712, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018775565549731255, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.014631936326622963, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01409474853426218, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013279229402542114, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.012612898834049702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00980198010802269, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009739573113620281, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.00823354721069336, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006483902223408222, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10834056884050369, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09809891879558563, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08864127099514008, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07970917969942093, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04956981912255287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.04277588799595833, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0671442449092865, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.061584435403347015, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.05131177976727486, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04386645928025246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04250182583928108, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03387557342648506, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.029341936111450195, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.023955337703227997, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.022538524121046066, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016981307417154312, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01251173298805952, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01175450999289751, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01136572565883398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.010442214086651802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008813697844743729, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008660382591187954, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006888656411319971, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005519889295101166, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2626214325428009, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.23834891617298126, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.22786292433738708, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.20399734377861023, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.12155438214540482, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.11139243841171265, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1465398669242859, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1322971135377884, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.1251872479915619, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10639487951993942, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.10116260498762131, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07463084161281586, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06316176801919937, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.058271374553442, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05706130340695381, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03724392130970955, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.029716679826378822, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.029169583693146706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.026601674035191536, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02580570988357067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019177133217453957, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018188226968050003, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.016912275925278664, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011138089001178741, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.24986112117767334, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21192000806331635, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19815577566623688, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.16705770790576935, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11464554816484451, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09821084141731262, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13707445561885834, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12475699931383133, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11930416524410248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0901598110795021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08183597028255463, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06996940821409225, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05991789326071739, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.055303897708654404, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.054170385003089905, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.035157013684511185, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02889162302017212, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.028348108753561974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024032805114984512, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02323688380420208, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018787767738103867, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01860187202692032, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01686723902821541, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012751336209475994, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1828702986240387, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17083871364593506, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1665153056383133, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14999902248382568, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.0863124281167984, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08159890025854111, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09742239117622375, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08985304087400436, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08766435831785202, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07702361792325974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0727950856089592, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04980256408452988, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04310649633407593, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.041508786380290985, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04113338887691498, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024972302839159966, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021580934524536133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021401211619377136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01971307024359703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01948264241218567, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01339336670935154, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013419861905276775, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012784245423972607, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009246445260941982, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.27678382396698, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.25927501916885376, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2533557415008545, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.22835922241210938, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.13071271777153015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1240559071302414, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1460684835910797, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13503144681453705, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.13262885808944702, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1168326586484909, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.11018326878547668, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07437656819820404, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06462918221950531, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.062655009329319, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.06219794601202011, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.037161070853471756, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03208734095096588, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.03187267854809761, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02922985330224037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02893965318799019, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01933305151760578, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019189579412341118, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01852329634130001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012368094176054, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.24161376059055328, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21448847651481628, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2020624876022339, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18090400099754333, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11022615432739258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09859272092580795, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13329079747200012, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12234467267990112, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11403361707925797, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09475549310445786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.0902712270617485, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06785810738801956, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05861643701791763, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05306793004274368, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.051690828055143356, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03402264788746834, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027796030044555664, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02721608616411686, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024761075153946877, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02388262376189232, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01810017041862011, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018187014386057854, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01618027500808239, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012490330263972282, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.12007162719964981, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10874760150909424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10069365799427032, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09050925076007843, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05537009984254837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04896612837910652, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07163839042186737, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06535623967647552, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05719604715704918, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04876408725976944, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.047009460628032684, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.036492060869932175, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03129162639379501, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02674194984138012, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.025569772347807884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0182795450091362, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01402096077799797, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013421434909105301, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012716202065348625, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01197248324751854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009556811302900314, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009484956040978432, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.00786807481199503, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0063057634979486465, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10349594056606293, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0929342731833458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08254888653755188, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07439006865024567, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.047203175723552704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03981185704469681, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06621333956718445, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06028927490115166, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04902824014425278, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04164113476872444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.040590379387140274, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03361058980226517, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.028821250423789024, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.022870661690831184, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.021236246451735497, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016808653250336647, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01206290815025568, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.011216932907700539, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010977317579090595, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009958355687558651, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00883469544351101, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00861930102109909, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006728947628289461, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00562920980155468, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2519940137863159, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22460851073265076, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2114940583705902, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1891980767250061, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11529166996479034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10318470001220703, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14409741759300232, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12958143651485443, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.119874507188797, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09987786412239075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09556367993354797, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0736955776810646, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06187625601887703, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.055322062224149704, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05369304493069649, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03689737617969513, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028356658294796944, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0275889839977026, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025229914113879204, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.024146711453795433, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018980590626597404, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017897548153996468, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01597900316119194, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011040225625038147, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23562918603420258, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2072681337594986, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19848957657814026, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1685493439435959, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10901129990816116, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09796132147312164, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12461183220148087, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11481024324893951, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11225154250860214, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08937230706214905, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08059319853782654, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06371432542800903, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0549999363720417, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.052367329597473145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.051736801862716675, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031834520399570465, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.0270191989839077, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026741618290543556, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023058557882905006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02263473905622959, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016703499481081963, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016653621569275856, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01557922549545765, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011079590767621994, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18255189061164856, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17046870291233063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16612961888313293, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14957138895988464, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08615099638700485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08138635754585266, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09710228443145752, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08973664790391922, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08755525946617126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07680504024028778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07243988662958145, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.049500465393066406, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0430203378200531, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04139462858438492, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.041009560227394104, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024753421545028687, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02140980400145054, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021227572113275528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01952384226024151, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019281644374132156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013010768219828606, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013171402737498283, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012378739193081856, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008877236396074295, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.27428582310676575, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.25684061646461487, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2509373128414154, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.22604431211948395, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.1296204775571823, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.12293290346860886, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14525292813777924, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.133985698223114, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.13156364858150482, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11575259268283844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.1091306060552597, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07409022003412247, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06416299939155579, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.062184885144233704, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.061721768230199814, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03706740960478783, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03192131221294403, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.031701669096946716, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02907377853989601, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.028778662905097008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019520211964845657, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019199980422854424, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01870819739997387, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012524889782071114, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2371426820755005, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20936042070388794, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1962297260761261, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1759357452392578, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10791362076997757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09580172598361969, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1323205828666687, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12104706466197968, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11199761927127838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09262944757938385, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08857119083404541, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06730736047029495, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.058096759021282196, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05207465589046478, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05055518075823784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03406193107366562, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027436167001724243, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.026800058782100677, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02446822077035904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02350343018770218, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018567077815532684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0182512030005455, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01654183492064476, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012713980861008167, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11910046637058258, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10814539343118668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1014256626367569, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09112553298473358, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.054986562579870224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04944378137588501, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06942107528448105, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06304670125246048, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05679722875356674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.048488058149814606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04650365933775902, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03539497032761574, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030200865119695663, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.026547688990831375, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02562793157994747, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01773308962583542, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013895335607230663, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013435792177915573, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012601356022059917, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.012017151340842247, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009326329454779625, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009225145913660526, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007899905554950237, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006201675161719322, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10428067296743393, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09483695030212402, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08801671117544174, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07902009785175323, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.048072636127471924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0426214225590229, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06190193071961403, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0565517321228981, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.049520790576934814, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.042406778782606125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.040688350796699524, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03138303756713867, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027072235941886902, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02317746914923191, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.022189846262335777, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015740467235445976, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.012084206566214561, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.011574272066354752, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010963644832372665, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.010339977219700813, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008248318918049335, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008097659796476364, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006838819943368435, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005299603100866079, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23965148627758026, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2175177037715912, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20770718157291412, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.18614938855171204, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11101995408535004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10136940330266953, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1326066255569458, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1209743544459343, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11429521441459656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09702446311712265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09206055849790573, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06747196614742279, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05780010297894478, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.053217560052871704, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05209919437766075, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03367473930120468, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.027149200439453125, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.026602521538734436, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024261711165308952, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.023511497303843498, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017307402566075325, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016619078814983368, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015370728448033333, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01014639437198639, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.21794812381267548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19176362454891205, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1824253648519516, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1549653559923172, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10014189034700394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08961237221956253, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11815081536769867, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1081581562757492, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10408283770084381, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08324403315782547, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07532443851232529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06065221503376961, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0521501749753952, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.048493675887584686, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.047618407756090164, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030594047158956528, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025768069550395012, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025380052626132965, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02250036597251892, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02192394807934761, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01671667769551277, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017076270654797554, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015309140086174011, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012435946613550186, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17223450541496277, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16057419776916504, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15627816319465637, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14064833521842957, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08129504323005676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07667235285043716, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0919404849410057, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08490907400846481, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08268779516220093, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07237023115158081, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06824811547994614, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.046920131891965866, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04075495898723602, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03910643607378006, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.038718756288290024, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023481061682105064, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020290708169341087, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02010941132903099, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018489105626940727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018244262784719467, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012413608841598034, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01258423924446106, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011780139058828354, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008581841364502907, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2621512711048126, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24512894451618195, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23938719928264618, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21550972759723663, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12382706254720688, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1173141598701477, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13882769644260406, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12819556891918182, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12571634352207184, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11044906079769135, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10415589809417725, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07087026536464691, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.061395592987537384, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05941731855273247, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05895450711250305, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03543230518698692, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.030522074550390244, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.030301090329885483, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027781615033745766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.027481747791171074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01862497255206108, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01840773969888687, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01782303862273693, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012034125626087189, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.234821155667305, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20610108971595764, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1923181116580963, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17250846326351166, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.1066899448633194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09404219686985016, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13228151202201843, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12044215202331543, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11090312898159027, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09106960892677307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08724936097860336, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06719597429037094, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05772784724831581, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05148494988679886, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.049919284880161285, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.033902522176504135, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027169696986675262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02650628425180912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024134786799550056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023131242021918297, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018387556076049805, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018156031146645546, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016247069463133812, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012651165015995502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11197934299707413, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10142366588115692, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09275156259536743, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08344908058643341, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.051592566072940826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04498939961194992, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06930860877037048, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06256284564733505, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.053299032151699066, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04554607719182968, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04412635415792465, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03535383939743042, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029965918511152267, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024981437250971794, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.023686856031417847, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017704006284475327, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01318768784403801, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012516584247350693, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011997285299003124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011184830218553543, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009287123568356037, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009151722304522991, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007373610977083445, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006163637153804302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09596790373325348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08606074005365372, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07422896474599838, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06699123978614807, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04349573701620102, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03551109507679939, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06474219262599945, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.058533694595098495, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04531829431653023, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.038564227521419525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.037997789680957794, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.032753486186265945, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027871090918779373, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02112690918147564, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019220655784010887, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016409732401371002, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011222186498343945, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010207518935203552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01027440745383501, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009068209677934647, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008580002933740616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008316991850733757, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006105477921664715, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005381662864238024, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24311289191246033, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21579186618328094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2020142674446106, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1804242581129074, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.1109907254576683, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0985003188252449, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14133046567440033, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1268051564693451, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11561893671751022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09614827483892441, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09221114963293076, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0724177211523056, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06062205880880356, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05346444621682167, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05162838473916054, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03633313626050949, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.027606802061200142, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0267226193100214, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024588601663708687, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.023407839238643646, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01871035248041153, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017862990498542786, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015439219772815704, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01136041060090065, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23857782781124115, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2095240205526352, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19961151480674744, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.16421253979206085, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10963327437639236, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09865035116672516, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12780702114105225, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11693478375673294, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11328189074993134, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08797290176153183, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07892457395792007, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06554988771677017, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05614902079105377, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.052855223417282104, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05206567049026489, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032824642956256866, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02768041007220745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02730463445186615, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023406866937875748, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02285628765821457, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017560234293341637, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017718974500894547, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01620764657855034, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012385365553200245, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1561475545167923, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14527499675750732, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1410813331604004, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12691953778266907, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07361812144517899, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06920725852251053, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08381220698356628, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07736202329397202, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07496704906225204, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06547756493091583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.061810992658138275, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04282499849796295, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03714027628302574, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03544185683131218, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03503458574414253, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021427322179079056, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01841876097023487, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01822621189057827, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01678638532757759, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016530318185687065, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011345487087965012, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011516118422150612, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010694906115531921, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007889149710536003, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23800700902938843, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22213813662528992, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21659986674785614, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19490353763103485, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11239475011825562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10620637983083725, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1267378330230713, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11688465625047684, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11420388519763947, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10013372451066971, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09445764869451523, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0647168755531311, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05603865906596184, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05400250107049942, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05351465940475464, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03240326792001724, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027919067069888115, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02769842930138111, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02544042095541954, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02513914555311203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01715688407421112, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01715037040412426, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01633504033088684, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011540329083800316, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21131020784378052, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18436118960380554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.171217679977417, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.15264518558979034, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.0960453599691391, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08398692309856415, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11926091462373734, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10906398296356201, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.0999951958656311, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08121122419834137, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07764513045549393, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.060986679047346115, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05257010459899902, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04656563699245453, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.045054078102111816, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030892491340637207, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.024923333898186684, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02428700216114521, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02210439182817936, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021141674369573593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01700964942574501, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017128443345427513, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01501091942191124, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012403643690049648, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10708410292863846, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0969795286655426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08919189125299454, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08027912676334381, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.049169864505529404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0432550385594368, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06572011113166809, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05901694670319557, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05098041519522667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.043556395918130875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.042209140956401825, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0335443839430809, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028262073174118996, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.023793039843440056, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.022629957646131516, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016792278736829758, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012541328556835651, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011952819302678108, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011443029157817364, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010714942589402199, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008817901834845543, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008645748719573021, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007052520755678415, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005826517473906279, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09393507242202759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08557549864053726, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07702474296092987, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06931892037391663, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0430663600564003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.037000544369220734, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.060231998562812805, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.054316673427820206, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.044502027332782745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.038404665887355804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.037409283220767975, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03054625727236271, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02595658227801323, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0208286065608263, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019451431930065155, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015281813219189644, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010936942882835865, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01019347831606865, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010013250634074211, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009130437858402729, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007982688024640083, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007700122892856598, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006067282985895872, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004936825018376112, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2577952742576599, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.235160231590271, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.22501568496227264, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.20225277543067932, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11994430422782898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.11007203161716461, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14441660046577454, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13092850148677826, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.12348821759223938, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10575418174266815, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.10071456432342529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07387097924947739, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06268910318613052, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05760946124792099, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.056349921971559525, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03702409937977791, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.029539410024881363, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02892943285405636, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.026638824492692947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.025811659172177315, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019065164029598236, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0183237437158823, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01672402396798134, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011515209451317787, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20266087353229523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17233198881149292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1623818427324295, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.13988114893436432, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09326829761266708, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08091047406196594, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10988215357065201, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09893399477005005, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09665191918611526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0745241791009903, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.06774283200502396, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05637504160404205, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047520577907562256, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04491821303963661, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04429413750767708, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028193147853016853, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.023475896567106247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023191770538687706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01978948526084423, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01936005800962448, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014958985149860382, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014957585372030735, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.013738812878727913, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010424820706248283, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.14780326187610626, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1373344212770462, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13322818279266357, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11984238773584366, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06964458525180817, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06539545953273773, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07951850444078445, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07335525006055832, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07096762210130692, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06188647076487541, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.058498211205005646, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04067046567797661, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03526240587234497, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03358135372400284, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.033179156482219696, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02036471478641033, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.017582179978489876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01738767698407173, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016039138659834862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01578603684902191, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010879887267947197, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01117635890841484, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01023710984736681, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007862157188355923, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.20238561928272247, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.18860507011413574, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1836494505405426, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1653154194355011, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09614928811788559, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.09078849852085114, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1087176725268364, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10031038522720337, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09776702523231506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0857279896736145, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08107699453830719, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05617733672261238, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04904256388545036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04719952493906021, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.046775296330451965, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02831810899078846, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02605746127665043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.025866150856018066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02414068579673767, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023886676877737045, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016015689820051193, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0181762482970953, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015330623835325241, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.014625228010118008, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.20796218514442444, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1829293966293335, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17137368023395538, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.15157893300056458, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09505004435777664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08419716358184814, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11675800383090973, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10608693957328796, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.0982811376452446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08021171391010284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07590215653181076, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05914938449859619, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0508696623146534, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04594437777996063, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04472046718001366, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02974008396267891, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.024388954043388367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.023859787732362747, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02154197171330452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.020746441558003426, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016025224700570107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016368631273508072, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014313260093331337, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011688199825584888, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10785636305809021, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09737962484359741, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0877947211265564, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07898496091365814, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04953283071517944, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0423893928527832, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06870569288730621, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06170514225959778, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05132267624139786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.043696943670511246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04273692145943642, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03508109226822853, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029611704871058464, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024051997810602188, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.022570418193936348, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017581578344106674, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012800535187125206, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01203228160738945, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011666662991046906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010745957493782043, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0092688063159585, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009125202894210815, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007148542441427708, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006210469175130129, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09561997652053833, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08582838624715805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07359105348587036, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.066422238945961, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0433405265212059, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03503322973847389, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06571037322282791, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05896333232522011, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04520269110798836, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03849269449710846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03820877894759178, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03341890126466751, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.028192048892378807, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.021083194762468338, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01905464008450508, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016785508021712303, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011236310936510563, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010181302204728127, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010308376513421535, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009053697809576988, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00873532984405756, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008435788564383984, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0060249571688473225, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005516245029866695, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23641936480998993, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20897138118743896, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19356685876846313, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17341850697994232, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10757742822170258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09423330426216125, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14039957523345947, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12533704936504364, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11242906749248505, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.093071848154068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0900617316365242, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07224240154027939, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.059964582324028015, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.051832180470228195, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04971972852945328, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0363871231675148, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.0269089937210083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025881009176373482, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0240146704018116, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022637279704213142, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018836697563529015, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017808474600315094, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015012490563094616, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01145913079380989, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2057046890258789, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17806746065616608, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1685846894979477, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.14572295546531677, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09403646737337112, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08317055553197861, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11129321902990341, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10124310106039047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09742867201566696, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07735777646303177, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07169156521558762, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05732545256614685, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0489603653550148, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.045763447880744934, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.044971778988838196, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028862837702035904, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.024620911106467247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02427397482097149, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02148197405040264, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020977109670639038, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015950482338666916, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01669156923890114, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014745884574949741, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0126063646748662, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1380513310432434, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12855742871761322, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12488118559122086, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11239127814769745, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06551016867160797, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06163620576262474, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07458598166704178, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06873851269483566, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06662502139806747, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05836215242743492, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05525019019842148, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03854386880993843, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.033627916127443314, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.032188110053539276, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03183657303452492, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.019457124173641205, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01777026243507862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.017616529017686844, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016452176496386528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016254322603344917, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010997170582413673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01242754701524973, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010471745394170284, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009986814111471176, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.14411918818950653, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13438159227371216, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13077528774738312, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11771897226572037, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06839112937450409, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0645221397280693, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07731955498456955, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07135557383298874, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06952975690364838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06093635782599449, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.057622410356998444, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.039914608001708984, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03477303683757782, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.033442460000514984, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.033126309514045715, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020090971142053604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018267441540956497, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.018128404393792152, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016882477328181267, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016697246581315994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011230806820094585, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012535789981484413, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010733980685472488, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00989906582981348, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.1831127107143402, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.16183947026729584, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.15226460993289948, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1326562613248825, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.0844724178314209, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.07559634745121002, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.10408591479063034, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.09286391735076904, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.08668278157711029, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07067857682704926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.06668183952569962, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05264241248369217, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.045255228877067566, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.041523922234773636, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04058358818292618, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.027038592845201492, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02311873622238636, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.022735824808478355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02065485157072544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.020104406401515007, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015701791271567345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016618965193629265, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014520278200507164, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013272781856358051, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10326644033193588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09310716390609741, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08398956060409546, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07551044225692749, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04733443632721901, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04059838876128197, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06555162370204926, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05893155559897423, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0491073876619339, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04172248765826225, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04079915210604668, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.033514343202114105, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028255628421902657, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.022962557151913643, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02155134454369545, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016760580241680145, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012193381786346436, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011468279175460339, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011110838502645493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010228208266198635, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008819700218737125, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008673437871038914, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006814008112996817, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005871197674423456, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09299731254577637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08337725698947906, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07191261649131775, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06478126347064972, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04209347069263458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03431283310055733, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06376770883798599, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05676765739917755, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.043908875435590744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03727663308382034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03693924471735954, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03232943266630173, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02698662132024765, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020469902083277702, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018612544983625412, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01617305912077427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010894295759499073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009929369203746319, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009965972043573856, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008813992142677307, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008441660553216934, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008102014660835266, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005956175737082958, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005309089552611113, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23920321464538574, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20917513966560364, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1921115517616272, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17191697657108307, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10837060958147049, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09354584664106369, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14237666130065918, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12789246439933777, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11386720091104507, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09294123202562332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09004469960927963, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0732111930847168, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06111384928226471, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.052222125232219696, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04991210624575615, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03677864000201225, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.026990503072738647, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02587820589542389, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023894252255558968, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022364625707268715, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018955377861857414, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01784934476017952, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014964205212891102, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011213255114853382, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.13328799605369568, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11518557369709015, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10954942554235458, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09292880445718765, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05945797264575958, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.05266506224870682, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.070925772190094, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06288640201091766, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.06127329543232918, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.048364922404289246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04503626376390457, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03567265719175339, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.032437536865472794, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.03102874755859375, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.030734887346625328, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018861761316657066, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.019514957442879677, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.019389702007174492, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01787441596388817, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.017706768587231636, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.011674856767058372, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01589846983551979, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.011169946752488613, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014395814388990402, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1240062415599823, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.11595770716667175, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.11297714710235596, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.10168100148439407, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.05855456739664078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.05532624572515488, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.06628221273422241, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.061023954302072525, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.05943656712770462, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05218450725078583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.04937111586332321, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0338556170463562, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.029266707599163055, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.028151758015155792, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.02788504585623741, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01692502573132515, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.014570708386600018, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01444170717149973, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.013282136991620064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.013114633969962597, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.008926835842430592, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.00899640191346407, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.008481505326926708, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0060971518978476524, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.08305786550045013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.07745712995529175, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.07543276995420456, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.0677947849035263, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.03917410224676132, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0369502492249012, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.044387202709913254, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.04085097834467888, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.03977368772029877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.03485137224197388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.03293849527835846, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.022738058120012283, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.019666248932480812, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.018908757716417313, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.018729159608483315, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.011423715390264988, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.009970519691705704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.009881890378892422, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.009125404991209507, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.00901353731751442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.006211332511156797, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.006406028755009174, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.005923778750002384, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.004639748018234968, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.12216383218765259, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.10993015021085739, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.102997787296772, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.088917575776577, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.056587107479572296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.05086273327469826, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.07275213301181793, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.06422993540763855, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.05822502449154854, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.048394933342933655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.04596484825015068, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.03736888989806175, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.031479451805353165, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.028091944754123688, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.027215907350182533, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.019391458481550217, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.016022903844714165, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.015668677166104317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.01460889633744955, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.014130556024610996, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01152767799794674, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.012019426561892033, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.010396884754300117, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.009852752089500427, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] } ], "last_module_idx": 66, "base_perplexity": 3.6769677152064584 }