{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.01275943685323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.011119289323687553, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.005508486181497574, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.005810607224702835, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.005810391623526812, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0023400583304464817, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.012495595030486584, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.011038575321435928, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.006007869727909565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.005367663688957691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.005599402356892824, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.005812550894916058, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.005366429686546326, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0031715892255306244, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0024158074520528316, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0030812781769782305, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0021889826748520136, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.001869905274361372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0021333058830350637, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0018205720698460937, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0020374697633087635, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0021331983152776957, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0016388832591474056, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0017997428076341748, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.01275943685323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.01275943685323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.012992233037948608, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.011369682848453522, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.005543082486838102, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.005824891850352287, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.005824476946145296, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.002223717514425516, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.013150682672858238, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.011271754279732704, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.006013189442455769, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0053520421497523785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0055928705260157585, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.005862193647772074, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.005349715240299702, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.0030905127059668303, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0022589873988181353, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.003059195354580879, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0019952328875660896, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0015972030814737082, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0019282781286165118, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0015341489342972636, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0019382128957659006, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0019281160784885287, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0014696221332997084, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.001508465618826449, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.012992233037948608, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.012992233037948608, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.12567944824695587, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07624762505292892, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.045995209366083145, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.054298561066389084, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.05425161123275757, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.025414051488041878, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.08636703342199326, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0707441195845604, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.059403762221336365, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03445873036980629, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04163304343819618, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.047166608273983, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.034169454127550125, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.026377998292446136, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.024234898388385773, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.024345194920897484, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.014011334627866745, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012005669996142387, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01031920500099659, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008603853173553944, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012475848197937012, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01026860997080803, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007827159017324448, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006759194657206535, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.014011334627866745, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.014011334627866745, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12470738589763641, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08068203926086426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04680238291621208, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05593650043010712, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.055253662168979645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.028925040736794472, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.08520190417766571, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.07600671797990799, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05949315428733826, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.037243012338876724, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.040360622107982635, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.043622519820928574, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.036724600940942764, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.027638383209705353, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025070123374462128, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.022022901102900505, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015951842069625854, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013987942598760128, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013025496155023575, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011376021429896355, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01221339963376522, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01295325718820095, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.009367065504193306, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010071592405438423, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015951842069625854, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015951842069625854, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1401272714138031, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12913364171981812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12556256353855133, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11349941045045853, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06374537199735641, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06051325052976608, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07133525609970093, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06591308861970901, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06448621302843094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05680740624666214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.054157767444849014, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.036371100693941116, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.031713567674160004, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.030813496559858322, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03060496225953102, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01830398105084896, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01668025180697441, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.016484582796692848, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01543144416064024, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015304679982364178, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010248910635709763, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01124312449246645, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009979909285902977, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008774952031672001, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01830398105084896, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01830398105084896, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16706553101539612, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1562347710132599, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15281596779823303, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13807502388954163, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07648147642612457, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07332564145326614, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08496138453483582, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07846009731292725, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07718880474567413, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06872232258319855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06540591269731522, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04292901232838631, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03723548352718353, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03641127049922943, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03621455281972885, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021411996334791183, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01868291012942791, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01847746968269348, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017159629613161087, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017038162797689438, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011248787865042686, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011205220595002174, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010981280356645584, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00739034079015255, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017159629613161087, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017159629613161087, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.09565626084804535, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.08125251531600952, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.07243717461824417, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.06323856860399246, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.042725928127765656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.036145664751529694, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.06186182051897049, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.05147887393832207, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.04450037330389023, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.03493631258606911, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.033785659819841385, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.029331734403967857, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.024770062416791916, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.021476365625858307, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.020651282742619514, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.015540581196546555, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.01283046044409275, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.012478374876081944, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.011603306978940964, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.011167890392243862, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.010100807063281536, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.010198396630585194, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.00933085847645998, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.00871196947991848, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.015540581196546555, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.015540581196546555, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.02287108823657036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.015712715685367584, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.008746277540922165, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.009634721092879772, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.009407475590705872, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.004084444604814053, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.016554618254303932, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.014948414638638496, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.01071622222661972, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.007254754193127155, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.007806919515132904, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.008360084146261215, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.00713199470192194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.004718300886452198, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.003956557251513004, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0042062439024448395, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0027354343328624964, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.002259301720187068, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.002381716389209032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0019170595332980156, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0022943976800888777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0023630138020962477, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0015301983803510666, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.001736532198265195, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.015712715685367584, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.015712715685367584, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.020175188779830933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.014209473505616188, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0075827972032129765, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.008352728560566902, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.008184258826076984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0033005522564053535, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.015452589839696884, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.013670653104782104, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.00936564989387989, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0065623680129647255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.00712616229429841, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.007686636410653591, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.006476155482232571, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.004084754269570112, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0032949764281511307, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.003864276222884655, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0023265432100743055, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0018530639354139566, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0020528752356767654, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0015678316121920943, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0020455997437238693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.002039017854258418, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0012372051132842898, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0014230022206902504, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.014209473505616188, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.014209473505616188, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.15830948948860168, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1047891303896904, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07353945076465607, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07324128597974777, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.06801234930753708, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.039009809494018555, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09911498427391052, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08932316303253174, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.07542908191680908, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04610365256667137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.047856654971838, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05042439326643944, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.042802635580301285, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.033054184168577194, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.030340299010276794, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02524496801197529, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01752402074635029, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01519844587892294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.013372626155614853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01128772646188736, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013124565593898296, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012794775888323784, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.009654898196458817, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008352130651473999, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01752402074635029, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01752402074635029, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1838000863790512, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1499737948179245, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.13440856337547302, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.11081787943840027, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0850515216588974, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0704389363527298, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10902348160743713, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09630917012691498, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08770690113306046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.062408484518527985, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.05872563645243645, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.056059256196022034, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.046907082200050354, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04189101234078407, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.040622711181640625, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02835037373006344, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023327739909291267, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022269627079367638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019342714920639992, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01851184293627739, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015788232907652855, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01695355214178562, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014143042266368866, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01339140348136425, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015788232907652855, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015788232907652855, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.19264408946037292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1804179847240448, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1763969212770462, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.15863777697086334, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0901019498705864, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08630970865488052, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1001751497387886, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09214536845684052, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09083645790815353, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08032669126987457, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07645135372877121, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05137832462787628, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04476577043533325, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.043908488005399704, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0437028706073761, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02592093124985695, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02406684309244156, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.023851707577705383, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.022302575409412384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02216939814388752, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014662657864391804, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016485081985592842, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014418793842196465, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013186393305659294, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014662657864391804, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014662657864391804, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23718956112861633, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22306382656097412, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2186088263988495, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19693884253501892, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11142510920763016, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1069902628660202, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1232440248131752, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11360762268304825, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11227288097143173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09968867897987366, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09474819153547287, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06306426227092743, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.054797615855932236, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05386107414960861, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05363118276000023, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03161054477095604, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0287646297365427, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028501544147729874, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026536084711551666, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02639879286289215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017339268699288368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018782183527946472, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01704525575041771, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.014239412732422352, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017339268699288368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017339268699288368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.019266938790678978, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.01885264366865158, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.0076510668732225895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.007226265501230955, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.00658221822232008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0033396000508219004, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.0189841128885746, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.018339574337005615, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.006654959637671709, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0063182394951581955, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.006278901360929012, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.006189986132085323, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.006095702759921551, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.003076673485338688, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0026357974857091904, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0029497791547328234, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.0023627006448805332, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.0010237663518637419, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.002323723863810301, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.0009342676494270563, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.002364848740398884, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.002294095465913415, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.0007721451111137867, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0007990803569555283, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.0076510668732225895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.0076510668732225895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.06495501846075058, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05045539513230324, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04174366593360901, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.03774729371070862, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.028634516522288322, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.021145163103938103, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04011400416493416, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03646942228078842, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.030654994770884514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02193349041044712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.021758345887064934, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02034274861216545, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0173930786550045, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.013885165564715862, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.012944113463163376, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010168259032070637, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.007388019934296608, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.006684990134090185, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006112394854426384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0054565491154789925, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005302106961607933, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005313909612596035, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004083487205207348, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003572427434846759, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0173930786550045, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0173930786550045, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06911342591047287, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05154700204730034, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.040002018213272095, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0371670201420784, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.02982129156589508, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.020238002762198448, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04379921406507492, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03985271602869034, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03242724388837814, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.022469526156783104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.022665148600935936, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.022185154259204865, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01895999163389206, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014523191377520561, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01327371783554554, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011087493970990181, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007769681978970766, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006873096339404583, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006389076821506023, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0055337464436888695, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005769968498498201, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0057839746586978436, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004252695478498936, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0038943744730204344, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014523191377520561, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014523191377520561, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.19718685746192932, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16189542412757874, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.14684905111789703, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1293652504682541, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09008070826530457, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07464925199747086, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11240565031766891, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10175859183073044, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09406094253063202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06994215399026871, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06696600466966629, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.057254016399383545, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.048697762191295624, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04337214305996895, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04203709959983826, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.028685111552476883, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02236759290099144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.021129224449396133, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018305977806448936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01734750345349312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014992231503129005, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014336202293634415, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013048535212874413, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00910595990717411, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018305977806448936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018305977806448936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.18523778021335602, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16438762843608856, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15435732901096344, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13252051174640656, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08629386872053146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07719441503286362, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10550233721733093, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09575586020946503, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08826154470443726, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0716003030538559, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06662970036268234, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.054685868322849274, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.046655815094709396, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.042470093816518784, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04145924001932144, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02788003720343113, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02360135316848755, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022831108421087265, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020944230258464813, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020320504903793335, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015901688486337662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0169677734375, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014603917486965656, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013466490432620049, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015901688486337662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015901688486337662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24022045731544495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22429881989955902, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21923793852329254, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19675061106681824, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11357422173023224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1082477942109108, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12623921036720276, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11636614054441452, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11465753614902496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10028237104415894, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0947880670428276, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06453792005777359, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.055879175662994385, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05463957414031029, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05435957387089729, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03231057524681091, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02854212559759617, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028222516179084778, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025926243513822556, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025744954124093056, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017331143841147423, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017831064760684967, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01693565584719181, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012636232189834118, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017331143841147423, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017331143841147423, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.27685660123825073, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2589142322540283, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.25324031710624695, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22738809883594513, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1308685541152954, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1248302236199379, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14488109946250916, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13390138745307922, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.13204286992549896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1157354936003685, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10937178879976273, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07381974160671234, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06405258923768997, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06271402537822723, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06239810213446617, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.036872927099466324, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.032191891223192215, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.0318245105445385, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.029116550460457802, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02891577035188675, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019317595288157463, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019274428486824036, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018881265074014664, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012685578316450119, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012685578316450119, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012685578316450119, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21676424145698547, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18902379274368286, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17833027243614197, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1545591503381729, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09818650782108307, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08779982477426529, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11689793318510056, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10653211176395416, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1013447493314743, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08114159852266312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07579910010099411, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05936691537499428, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.051004745066165924, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0472952201962471, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.046386390924453735, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029832281172275543, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025041408836841583, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02461119182407856, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02178850583732128, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021203376352787018, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016205022111535072, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016523050144314766, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014997638761997223, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011919230222702026, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016205022111535072, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016205022111535072, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.05012078955769539, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.041526731103658676, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.036098137497901917, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.03194568678736687, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.022632941603660583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.018191147595643997, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.030887315049767494, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.028027066960930824, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.02373027801513672, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.018091464415192604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.01762050949037075, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.01569598726928234, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01338527537882328, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.010991223156452179, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.010357019491493702, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.007858061231672764, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.005861281417310238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.005419644061475992, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.004996471107006073, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.004583275411278009, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0041720070876181126, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004170331172645092, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0033488876651972532, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.002888706047087908, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.018091464415192604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.018091464415192604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.05120737850666046, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.04133869707584381, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.034342553466558456, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.030622031539678574, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.02272220142185688, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.017164330929517746, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.032779209315776825, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.02961278147995472, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.02404228039085865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.017968716099858284, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.01784524880349636, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01660379022359848, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.014114826917648315, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.010986670851707458, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.010116909630596638, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.008294081315398216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.005776511039584875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0051724109798669815, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.004856356419622898, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.004254857078194618, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004293343983590603, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004157469142228365, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0031715116929262877, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0026452455203980207, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.017968716099858284, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.017968716099858284, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20368115603923798, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17093627154827118, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15681910514831543, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.13750649988651276, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09334611147642136, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07948873937129974, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11461208760738373, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10496007651090622, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09699926525354385, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0738513395190239, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07034783810377121, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.058255139738321304, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.050084393471479416, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04479771479964256, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04347536340355873, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029100770130753517, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.023012207821011543, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.021832751110196114, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01907731406390667, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.018131505697965622, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014987739734351635, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014552095904946327, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013078160583972931, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009042911231517792, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.018131505697965622, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.018131505697965622, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19220387935638428, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16975539922714233, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16095903515815735, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13600927591323853, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08984604477882385, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08147896826267242, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10616087913513184, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09641347080469131, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09152523428201675, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0727917030453682, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06675717979669571, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.054602012038230896, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.046803027391433716, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04389543458819389, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.043218355625867844, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.027567649260163307, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02400200627744198, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023405829444527626, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020862553268671036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02042797952890396, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015337876975536346, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016634685918688774, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014410716481506824, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013009438291192055, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015337876975536346, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015337876975536346, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23846650123596191, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22252397239208221, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21743743121623993, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19549234211444855, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11295338720083237, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10749615728855133, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12544305622577667, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11585785448551178, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11404889822006226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09975531697273254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09447642415761948, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06408294290304184, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05556382238864899, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05427036061882973, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05395827814936638, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032065995037555695, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028180858120322227, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027836676687002182, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025550255551934242, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02535898983478546, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017070258036255836, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017364712432026863, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016636237502098083, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012008548714220524, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017070258036255836, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017070258036255836, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.27891239523887634, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2607352137565613, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2549844980239868, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22942373156547546, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1321885734796524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1259755939245224, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14692236483097076, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13544616103172302, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.13348613679409027, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11697022616863251, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.1108546033501625, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07505883276462555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06488144397735596, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06342753022909164, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06308087706565857, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03754644840955734, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03262075036764145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.0322284996509552, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.029534991830587387, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.029311997815966606, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019944170489907265, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019635602831840515, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.019463935866951942, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0130162900313735, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0130162900313735, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0130162900313735, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23827290534973145, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20969444513320923, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19889584183692932, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1730913668870926, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10881037265062332, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09808186441659927, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1281501054763794, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11714062094688416, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11201410740613937, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09047941118478775, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08468645811080933, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06510637700557709, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.056075602769851685, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05234348773956299, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.051432203501462936, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03269237279891968, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027489706873893738, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02705518715083599, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02394494041800499, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023349709808826447, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017624691128730774, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017805511131882668, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01641131564974785, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012558395974338055, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017624691128730774, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017624691128730774, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.06702592968940735, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05625208839774132, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.049177706241607666, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.04375336691737175, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.030441943556070328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.024668898433446884, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04174841195344925, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03763701394200325, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0318269282579422, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02468031831085682, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.024120701476931572, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.021271631121635437, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01805277168750763, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.014830037951469421, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.013978710398077965, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010689500719308853, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.00799037329852581, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.007396392524242401, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006909151561558247, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.006354187615215778, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005709015764296055, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005765927955508232, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00458691967651248, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00409115944057703, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01805277168750763, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01805277168750763, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06652186065912247, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05449296906590462, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.046318694949150085, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04138997569680214, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.02969786711037159, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.02309098280966282, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.042244818061590195, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.037906937301158905, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03149786964058876, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.023836690932512283, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.023620057851076126, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.021513910964131355, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.018160521984100342, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014423471875488758, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.013413402251899242, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010802199132740498, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.00768913421779871, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006999204400926828, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006576050538569689, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.005907156504690647, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0056786807253956795, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005570540204644203, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004330508876591921, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003771512769162655, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.018160521984100342, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.018160521984100342, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22013580799102783, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18759037554264069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17388662695884705, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15319983661174774, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10129880905151367, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08781062811613083, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12277815490961075, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11245959252119064, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10499890893697739, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08168935775756836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07757064700126648, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06250408291816711, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05380699783563614, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04874077066779137, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.047482289373874664, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031240174546837807, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02517124079167843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02406926453113556, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021209556609392166, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020339952781796455, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01620187982916832, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015994831919670105, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014362920075654984, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010329579003155231, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01620187982916832, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01620187982916832, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19244790077209473, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17076055705547333, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1621529906988144, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14055907726287842, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0891374722123146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0808408334851265, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10695629566907883, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09618951380252838, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09107254445552826, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0741494670510292, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06976872682571411, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.054938752204179764, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0468633696436882, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04372876510024071, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.042965397238731384, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02781437337398529, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024022769182920456, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023400627076625824, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021265216171741486, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02080479823052883, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015563834458589554, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016857530921697617, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01455999817699194, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013274384662508965, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015563834458589554, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015563834458589554, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.21777626872062683, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20217300951480865, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19669705629348755, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.17665976285934448, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10306573659181595, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09739474952220917, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11610133945941925, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10690769553184509, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10432476550340652, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09058684855699539, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08582833409309387, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05929151177406311, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.051310423761606216, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.049600400030612946, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04918815568089485, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.029726814478635788, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.025798136368393898, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025406640022993088, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023307740688323975, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.023052319884300232, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015887945890426636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01605185493826866, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01530428882688284, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01111422386020422, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015887945890426636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015887945890426636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.278226763010025, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2590949237346649, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2527182400226593, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22727271914482117, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.13193479180335999, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.125116229057312, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14760205149650574, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13611829280853271, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.13337813317775726, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11636700481176376, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.11027363687753677, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07525144517421722, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06521065533161163, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06332455575466156, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06286881119012833, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03768269717693329, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0325670950114727, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.03209853544831276, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.029379377141594887, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.029096757993102074, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019852949306368828, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01964583434164524, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01922552101314068, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01294859778136015, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01294859778136015, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01294859778136015, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24829550087451935, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22237050533294678, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21288955211639404, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18657325208187103, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11416060477495193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10469087213277817, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1334172785282135, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12143617868423462, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11704163253307343, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0969800055027008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09069009125232697, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06788500398397446, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.058235034346580505, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05495607107877731, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.054156593978405, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03411637991666794, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028893768787384033, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02851799502968788, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02557377703487873, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.025055982172489166, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018430586904287338, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018655193969607353, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01732473447918892, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013280129060149193, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01732473447918892, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01732473447918892, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.08102847635746002, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07001844048500061, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06310159713029861, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.056019823998212814, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.037117376923561096, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03150222823023796, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.048961978405714035, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04440220072865486, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0384717732667923, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.030782323330640793, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.029746921733021736, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.024831540882587433, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.021234342828392982, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018009379506111145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.017165830358862877, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012450811453163624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.009572735987603664, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008993801660835743, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.008355884812772274, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00781241012737155, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0066170585341751575, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006661646533757448, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005496849771589041, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004633863922208548, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018009379506111145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018009379506111145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07670596241950989, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06514334678649902, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05728379637002945, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.050820086151361465, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03464207798242569, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.02842077612876892, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04718319699168205, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04298615828156471, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03616851568222046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.028448306024074554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.027720052748918533, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.023871876299381256, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.020461872220039368, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01674812287092209, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.015743453055620193, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011943496763706207, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008712168782949448, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.008028571493923664, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0074828751385211945, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.006815232336521149, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006168047897517681, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005999094340950251, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004844572860747576, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0038059118669480085, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01674812287092209, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01674812287092209, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23635442554950714, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20809409022331238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19725647568702698, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17388083040714264, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10998384654521942, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0988815650343895, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1291724145412445, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11854273080825806, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1127854585647583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09117800742387772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08602988719940186, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06569357216358185, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05663759633898735, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.052708715200424194, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05177108198404312, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03276946023106575, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02681542932987213, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025953050702810287, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022956229746341705, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022291429340839386, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016831541433930397, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016218367964029312, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015395142138004303, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00982974749058485, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016831541433930397, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016831541433930397, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22241993248462677, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2015293687582016, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1931602656841278, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1692381054162979, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10391878336668015, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09557594358921051, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12118163704872131, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11066106706857681, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10561590641736984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08809242397546768, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08254539221525192, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06250692903995514, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.053397487848997116, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05026451125741005, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04949335381388664, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03151364251971245, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026643704622983932, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026000522077083588, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023585546761751175, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02311106026172638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017296476289629936, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017493071034550667, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016216669231653214, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012765240855515003, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017296476289629936, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017296476289629936, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.21141144633293152, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19647647440433502, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19118207693099976, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.17175662517547607, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10003229230642319, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09456566721200943, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11249616742134094, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10374634712934494, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10118568688631058, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.087982177734375, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08338427543640137, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05742646008729935, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04973090812563896, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.048052262514829636, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04765000194311142, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.028735440224409103, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.024829953908920288, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.024438876658678055, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.022398190572857857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.022142425179481506, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015206430107355118, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015205363743007183, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014643257483839989, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01022788230329752, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015206430107355118, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015206430107355118, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2765046954154968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.25807011127471924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.25169074535369873, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22669363021850586, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.13119067251682281, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.12456202507019043, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1467142403125763, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1353885382413864, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.13262733817100525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11596719175577164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.11002981662750244, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07486043870449066, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0648248940706253, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06294863671064377, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06250586360692978, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03744758293032646, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03232473134994507, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.03186536952853203, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.029210710898041725, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02892616204917431, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019690973684191704, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019438641145825386, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01907268539071083, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012718326412141323, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012718326412141323, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012718326412141323, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24638298153877258, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22315289080142975, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21424424648284912, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18896500766277313, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11386702954769135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10504771023988724, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13283924758434296, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12097437679767609, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11636360734701157, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09796127676963806, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09193184226751328, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06781825423240662, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05801631510257721, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05479004606604576, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0539974644780159, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03411843255162239, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028719358146190643, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.028323426842689514, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025609727948904037, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02510596625506878, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018494045361876488, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018404871225357056, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017392301931977272, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012954048812389374, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018404871225357056, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018404871225357056, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.07649534195661545, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06578436493873596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05807060748338699, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05163629725575447, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03495276719331741, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.028917580842971802, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04807533323764801, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04331713542342186, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.036328766494989395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.028946861624717712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.028220100328326225, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.024420542642474174, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.020717570558190346, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.016991375014185905, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.016002358868718147, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012240213342010975, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.009084304794669151, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008424580097198486, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007940523326396942, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007305016741156578, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006457592826336622, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0064908177591860294, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005120695102959871, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004514886997640133, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.016991375014185905, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.016991375014185905, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07277464121580124, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06148331239819527, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05297271907329559, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04712408408522606, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.032787322998046875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.026215579360723495, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.046387992799282074, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04191789776086807, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.034287724643945694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.026881389319896698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0264658834785223, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02346695028245449, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.019953476265072823, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.015883583575487137, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.014779004268348217, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011729098856449127, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008382164873182774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.007626845967024565, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007230015937238932, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.006486176047474146, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006113784853368998, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005972492042928934, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004656921606510878, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0038982443511486053, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.015883583575487137, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.015883583575487137, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.213927760720253, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18876846134662628, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1775648444890976, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1568218618631363, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09948558360338211, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0888146311044693, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11950996518135071, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10948392003774643, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10203483700752258, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08284642547369003, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07843296229839325, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06071174144744873, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.052282046526670456, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04778769612312317, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04666782170534134, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030319495126605034, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02446436695754528, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023546945303678513, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02106074057519436, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020320642739534378, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015608002431690693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015192738734185696, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013969825580716133, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009487205184996128, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015608002431690693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015608002431690693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2128264158964157, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18929126858711243, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17981548607349396, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15728673338890076, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09932874143123627, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08961936831474304, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11760739982128143, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1065477654337883, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10120923817157745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08267620205879211, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07731056213378906, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06048939377069473, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0514628067612648, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04816173389554024, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04737038537859917, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03037581592798233, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.025700122117996216, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.024996519088745117, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02250351570546627, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022005422040820122, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0164419487118721, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01713729090988636, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01533910259604454, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01270155981183052, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0164419487118721, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0164419487118721, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.19741065800189972, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.18379606306552887, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17876483500003815, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.16105647385120392, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0934600904583931, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0883963406085968, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10542267560958862, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09723111987113953, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09454572200775146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08252694457769394, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07832860946655273, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.053808510303497314, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04663187637925148, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.044918566942214966, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04450811445713043, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.026936503127217293, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.023271752521395683, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02289571426808834, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.021069416776299477, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02081385813653469, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014258291572332382, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014357413165271282, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013672634027898312, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009746190160512924, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014258291572332382, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014258291572332382, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.26863187551498413, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2508891820907593, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2446412742137909, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22063209116458893, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12750908732414246, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.12102096527814865, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14295801520347595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13190853595733643, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12889264523983002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11288970708847046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.1071745902299881, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07291217893362045, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.063177689909935, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.061191026121377945, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.060724932700395584, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03644562512636185, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.031433217227458954, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.03096669539809227, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.028447892516851425, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.028147529810667038, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019098205491900444, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01894594170153141, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018420765176415443, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012399163097143173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012399163097143173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012399163097143173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24719326198101044, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2258451133966446, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21794350445270538, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.19424472749233246, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11469901353120804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10674823075532913, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1332968771457672, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1209644004702568, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11701895296573639, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.1000649556517601, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09471768140792847, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06785054504871368, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0580345094203949, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05520257353782654, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05453195050358772, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.034037381410598755, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028995811939239502, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.028660692274570465, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02615131065249443, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.025718901306390762, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018310613930225372, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018589559942483902, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017349667847156525, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013227255083620548, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018310613930225372, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018310613930225372, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.08293743431568146, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07360319793224335, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06505594402551651, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05779169872403145, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.038329847157001495, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03205855190753937, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05388479679822922, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.048370346426963806, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.039410341531038284, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.032655488699674606, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.031974323093891144, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02750702016055584, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0232872124761343, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.01868859864771366, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.01743880659341812, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013790995813906193, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.010083922185003757, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.009290805086493492, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009017028845846653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.008231902495026588, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007306193932890892, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007383236661553383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005652999505400658, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005187653005123138, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.01743880659341812, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.01743880659341812, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07531160861253738, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06604965031147003, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05667058378458023, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.05017174035310745, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03427628427743912, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.027658486738801003, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04983561858534813, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04521244019269943, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03539546951651573, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.029030971229076385, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.028585389256477356, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02526061423122883, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02159666083753109, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.016639450564980507, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01525010634213686, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012682837434113026, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008810687810182571, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.007940148003399372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0077971285209059715, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.006911139469593763, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006574397440999746, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006449645850807428, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004794538952410221, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004211754538118839, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.016639450564980507, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.016639450564980507, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22791795432567596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2040414959192276, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19411508738994598, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17135412991046906, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10661764442920685, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09679155796766281, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12614116072654724, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11501236259937286, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10888735949993134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08970633149147034, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08475091308355331, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06429170072078705, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05500972270965576, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05115414410829544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05023034289479256, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.032075680792331696, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02618192508816719, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025354210287332535, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0227062851190567, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022074824199080467, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01656486839056015, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016028691083192825, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01506173238158226, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01000930555164814, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01656486839056015, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01656486839056015, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21820856630802155, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19710533320903778, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1884101927280426, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16439342498779297, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10191573947668076, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09334711730480194, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12175583839416504, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10931520164012909, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1038106307387352, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08621449768543243, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.081074558198452, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06267035752534866, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05313017591834068, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.049869731068611145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04908112809062004, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03163696080446243, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027216291055083275, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026538919657468796, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024283114820718765, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02379848062992096, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017624754458665848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018825097009539604, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016600269824266434, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01467045210301876, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017624754458665848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017624754458665848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1908998042345047, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17817851901054382, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17362143099308014, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.15662682056427002, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09054502844810486, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08586696535348892, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10169584304094315, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09374743700027466, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09154831618070602, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08011996746063232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07616512477397919, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05198978632688522, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04500763490796089, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04355897754430771, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0432143434882164, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0260143019258976, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02265249751508236, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.022321190685033798, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02056361548602581, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.020347602665424347, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013835779391229153, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014065009541809559, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013347746804356575, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009722629562020302, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013835779391229153, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013835779391229153, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2686653733253479, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2514193654060364, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24553550779819489, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22180120646953583, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12779033184051514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.12158344686031342, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14294026792049408, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13170820474624634, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12907668948173523, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11336508393287659, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10782422870397568, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07311040163040161, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06318815052509308, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06141306087374687, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06099030002951622, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.036584313958883286, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.031722716987133026, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.03129525110125542, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.028799759224057198, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02853338234126568, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019413786008954048, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019354065880179405, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0188059750944376, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013051639311015606, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013051639311015606, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013051639311015606, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23920321464538574, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.217992901802063, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2100324183702469, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18735584616661072, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11082608997821808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10287046432495117, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12864446640014648, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11713260412216187, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1131426990032196, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09645292907953262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09142594784498215, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0656050443649292, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05614612251520157, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.053301047533750534, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.052617959678173065, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03291955590248108, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027888260781764984, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027545539662241936, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025068404152989388, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02463589794933796, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017671288922429085, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017761142924427986, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016720382496714592, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012471767142415047, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017671288922429085, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017671288922429085, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.0809842050075531, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07117723673582077, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06493458896875381, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.057156071066856384, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.037323229014873505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0322340652346611, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.048177946358919144, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04403451085090637, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.03839053586125374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.031142812222242355, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.029839161783456802, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02447769045829773, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.021037518978118896, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018040070310235023, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.017277514562010765, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012258199974894524, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.00950411707162857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008958198130130768, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.008298376575112343, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007794437929987907, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006445575971156359, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006475752219557762, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005408027675002813, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004396394360810518, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018040070310235023, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018040070310235023, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07386719435453415, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06363579630851746, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05693390965461731, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.049931850284338, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03354819118976593, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.028136542066931725, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04454325884580612, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04062208533287048, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03474180027842522, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.027550920844078064, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.02658085897564888, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.022622087970376015, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0193875040858984, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01618206687271595, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01535378023982048, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01132361963391304, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008465598337352276, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.007873406633734703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007287466898560524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.006727590691298246, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005888796877115965, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005789778660982847, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004770476371049881, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003784175729379058, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01618206687271595, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01618206687271595, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21119599044322968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18665440380573273, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1765526980161667, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15491317212581635, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09805329889059067, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0881473645567894, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1175428032875061, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10638872534036636, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10045906901359558, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08143850415945053, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07686009258031845, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05981731414794922, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0509364940226078, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.047113001346588135, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04617699608206749, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029898623004555702, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024289511144161224, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02345114015042782, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020923597738146782, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02028358168900013, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015478981658816338, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015176392160356045, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01396037545055151, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009828539565205574, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015478981658816338, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015478981658816338, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21922922134399414, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19999195635318756, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1918427050113678, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16885672509670258, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10240467637777328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09483589977025986, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12068427354097366, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10956878960132599, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10413987189531326, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08774584531784058, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08243580907583237, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06193884462118149, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.052731096744537354, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.049488089978694916, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04871559143066406, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031031083315610886, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02610182575881481, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025462837889790535, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02323724515736103, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022724997252225876, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016587650403380394, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017002079635858536, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01553458534181118, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012175437062978745, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016587650403380394, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016587650403380394, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1834942251443863, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17146101593971252, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16705451905727386, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.15091998875141144, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08698525279760361, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08252827078104019, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0978824645280838, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09019827097654343, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08793879300355911, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07710130512714386, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07334383577108383, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04998030513525009, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043253667652606964, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04181631654500961, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.041477106511592865, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02500622160732746, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021683068946003914, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02136106975376606, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019690051674842834, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019477400928735733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013220982626080513, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01338515616953373, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012733670882880688, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009141076356172562, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013220982626080513, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013220982626080513, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2626471519470215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24597632884979248, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24008598923683167, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.21693119406700134, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12485132366418839, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11871440708637238, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13984419405460358, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12880001962184906, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12611572444438934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11072095483541489, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10526491701602936, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07139376550912857, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.061695780605077744, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05991334468126297, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05948702618479729, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035695336759090424, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030795086175203323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.030372144654393196, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027916742488741875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.027644870802760124, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01873595267534256, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018576616421341896, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01812022738158703, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01223315391689539, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01812022738158703, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01812022738158703, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23883086442947388, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2184232920408249, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21095609664916992, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1882353276014328, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11071617156267166, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10321302711963654, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12803371250629425, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1165890023112297, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11288921535015106, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09670726209878922, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0914267897605896, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06523554772138596, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05589746683835983, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.053261514753103256, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05261689051985741, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03279504179954529, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02792266011238098, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027605077251791954, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025195712223649025, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02478698454797268, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01773068681359291, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017824161797761917, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016849249601364136, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012613795697689056, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01773068681359291, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01773068681359291, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1015545055270195, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08973906189203262, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08229447156190872, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0725463479757309, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.046892616897821426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04083774983882904, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06054271012544632, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05485399439930916, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.048148032277822495, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03934046998620033, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03772822394967079, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030813051387667656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.026311524212360382, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.022752465680241585, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0218415018171072, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01546213123947382, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012137548997998238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011507788673043251, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010691064409911633, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010114306584000587, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008282438851892948, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008407222107052803, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0070380233228206635, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00597863644361496, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01546213123947382, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01546213123947382, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09195046871900558, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0803050696849823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0725734606385231, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06375870108604431, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.042012084275484085, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.035783570259809494, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05529123544692993, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05019064620137215, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.043342944234609604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03491203859448433, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03367401286959648, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.028169434517621994, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.024034207686781883, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020274920389056206, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019302239641547203, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014087256975471973, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010608096607029438, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.00991249829530716, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009206602349877357, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008554385043680668, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007342277094721794, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007207895163446665, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006005952600389719, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004723715595901012, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014087256975471973, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014087256975471973, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2178453505039215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1963426023721695, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18808968365192413, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16486164927482605, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10204894840717316, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09360203891992569, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1183425635099411, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10834746807813644, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1037730723619461, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08572037518024445, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08057589828968048, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06013111025094986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05173538625240326, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.048857368528842926, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04814928397536278, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030030082911252975, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024829789996147156, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02416982874274254, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021466832607984543, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02099517732858658, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01538592204451561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014843454584479332, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014288114383816719, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009040804579854012, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01538592204451561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01538592204451561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.217198446393013, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19780291616916656, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18958869576454163, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16726580262184143, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10165482759475708, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09393689781427383, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11912326514720917, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10846021771430969, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10325387865304947, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08651862293481827, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08227308094501495, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.061002716422080994, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05211157724261284, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.048956625163555145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04820835217833519, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030515028163790703, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.025523515418171883, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0248887799680233, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022509479895234108, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022015517577528954, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016094377264380455, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01623203046619892, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01505210343748331, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011197603307664394, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016094377264380455, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016094377264380455, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.18110162019729614, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16885358095169067, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16441424190998077, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1484232246875763, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08582135289907455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08128988742828369, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0967734232544899, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0890333279967308, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08678612858057022, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07588779181241989, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07225233316421509, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04953474551439285, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04278833046555519, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04133439064025879, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04098957031965256, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024818072095513344, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021593596786260605, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02126716822385788, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019626528024673462, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019409753382205963, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013288081623613834, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013570769689977169, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012799890711903572, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009545154869556427, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013288081623613834, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013288081623613834, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2549581229686737, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23798836767673492, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2321341335773468, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20948028564453125, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12094669789075851, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11475837230682373, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1358029842376709, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12488971650600433, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1222711130976677, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10691942274570465, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10174825042486191, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06947356462478638, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0599195621907711, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05811958760023117, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05769720301032066, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03478528931736946, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03005913272500038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02962731197476387, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027226518839597702, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02695830911397934, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018491491675376892, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018412314355373383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017884420230984688, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012466118671000004, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018412314355373383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018412314355373383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24303212761878967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22224028408527374, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2143504023551941, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.19148677587509155, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11266855895519257, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10478603094816208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13078360259532928, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1191074550151825, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11493939906358719, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0984005406498909, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09304697066545486, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06638451665639877, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05698668956756592, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05409467965364456, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05341000482439995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03326881676912308, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028160838410258293, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027818448841571808, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02534923143684864, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024909576401114464, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01777895726263523, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017742402851581573, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016816260293126106, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012221165932714939, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01777895726263523, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01777895726263523, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.08913807570934296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07928265631198883, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0733492448925972, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06409411132335663, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.041313640773296356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03644607961177826, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05204809457063675, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04744679853320122, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0422280915081501, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03451011702418327, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.032832205295562744, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.026448464021086693, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.022668031975626945, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.019926320761442184, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.019238218665122986, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013238199055194855, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.010444342158734798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.009939320385456085, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009112664498388767, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00865874532610178, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006958452053368092, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006957276724278927, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005981233902275562, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004705019760876894, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013238199055194855, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013238199055194855, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08351866900920868, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07346705347299576, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06759173423051834, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.058808911591768265, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03834577277302742, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03346250206232071, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.048895463347435, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04432666301727295, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03930164873600006, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03175680711865425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.030213847756385803, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024790093302726746, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021171290427446365, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01844479888677597, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017753129824995995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0123895900323987, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009554088115692139, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009046993218362331, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008235331624746323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0077627371065318584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00641842745244503, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006264281924813986, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005406656768172979, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004021590109914541, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017753129824995995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017753129824995995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20364946126937866, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18223537504673004, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1740134060382843, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15152107179164886, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09521093219518661, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08683402091264725, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11220111697912216, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10152093321084976, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09703312069177628, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07925765216350555, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07432247698307037, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05715194717049599, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.048511967062950134, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.045677825808525085, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.044957149773836136, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02854081802070141, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.023359254002571106, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022727424278855324, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02010931819677353, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019635751843452454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014720987528562546, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014235097914934158, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013523290865123272, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008993403054773808, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014720987528562546, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014720987528562546, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21144157648086548, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1920141875743866, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18357311189174652, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16127873957157135, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09824641048908234, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09037396311759949, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11672438681125641, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10602232068777084, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10013601928949356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08406606316566467, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07945346087217331, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.059995293617248535, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05105400085449219, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0475468747317791, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04669451341032982, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03009885549545288, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.025188948959112167, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.024519795551896095, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022424783557653427, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021873652935028076, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016278579831123352, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016622304916381836, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015140283852815628, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012034473940730095, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016278579831123352, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016278579831123352, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17728447914123535, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16453561186790466, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15965595841407776, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1436053365468979, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0838182270526886, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07899708300828934, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09522347152233124, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08750934898853302, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0848715603351593, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07371868193149567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07006561756134033, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04872826114296913, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04204389080405235, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04038826748728752, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.040002647787332535, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024432150647044182, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021152138710021973, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02078993059694767, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019152533262968063, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01890828087925911, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013086476363241673, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013397330418229103, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012518948875367641, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00947677530348301, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013086476363241673, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013086476363241673, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24539689719676971, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22797361016273499, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22165192663669586, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19929680228233337, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11615224927663803, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10970190167427063, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1312672197818756, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1206611767411232, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11757097393274307, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10207030922174454, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09696298092603683, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06710699200630188, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05782970413565636, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.055785175412893295, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05531575530767441, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03355114907026291, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028810180723667145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02834337018430233, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025961248204112053, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025657828897237778, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017690429463982582, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017638811841607094, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016984781250357628, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011826951988041401, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017690429463982582, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017690429463982582, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2539736032485962, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.23284579813480377, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.22506272792816162, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.20106187462806702, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11821615695953369, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.11038093268871307, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13687554001808167, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1244204193353653, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.12051432579755783, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.10354170203208923, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09812406450510025, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06994784623384476, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05969799682497978, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.056894224137067795, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05623549968004227, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03509903699159622, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.029823124408721924, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.029489601030945778, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02695230394601822, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02652719058096409, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01889488473534584, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019016681239008904, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017944084480404854, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01344591286033392, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017944084480404854, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017944084480404854, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10251321643590927, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09095850586891174, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08418582379817963, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07347791641950607, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0476890467107296, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04206385836005211, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06001431867480278, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05453228950500488, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04876960441470146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.039684098213911057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03776253014802933, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030528796836733818, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.026145445182919502, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02309877797961235, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.022333431988954544, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015304175205528736, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012253542430698872, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011690858751535416, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010706007480621338, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010206060484051704, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008114837110042572, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008306683041155338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007040070835500956, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005855225957930088, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015304175205528736, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015304175205528736, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09007672965526581, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07934901118278503, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07225529849529266, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06290510296821594, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04140280932188034, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.035808488726615906, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05390022322535515, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04897794499993324, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04242878034710884, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03437339514493942, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.032871346920728683, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.027335908263921738, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.023418700322508812, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020003169775009155, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019140558317303658, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01364265289157629, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01051180250942707, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009891048073768616, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009142697788774967, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008555310778319836, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00718203280121088, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0071470774710178375, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0059919641353189945, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004794563632458448, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01364265289157629, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01364265289157629, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2199394851922989, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19490991532802582, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18525740504264832, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16041329503059387, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10253296792507172, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09263452887535095, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12230805307626724, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10999707877635956, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10466956347227097, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0844690129160881, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07891146093606949, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06242338940501213, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05265549197793007, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04919181764125824, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.048347942531108856, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031216895207762718, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.025208037346601486, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.024426519870758057, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021499445661902428, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020924022421240807, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016114305704832077, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015467523597180843, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014566856436431408, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00982658565044403, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016114305704832077, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016114305704832077, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22014017403125763, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19829216599464417, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18318356573581696, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1616823673248291, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10312635451555252, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09129630029201508, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13462576270103455, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12043818831443787, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10537610203027725, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08786502480506897, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08491935580968857, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06976598501205444, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0582738034427166, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.050360921770334244, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04832790419459343, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03506753221154213, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027184970676898956, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025853941217064857, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02428032085299492, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022998763248324394, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.019147394225001335, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.019186528399586678, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016481898725032806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014028029516339302, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016481898725032806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016481898725032806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17937786877155304, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16602420806884766, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16079813241958618, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14458447694778442, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08485747873783112, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07971276342868805, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09715434908866882, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08904703706502914, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08600980043411255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07441417872905731, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07090654969215393, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.049824804067611694, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.042890846729278564, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0410132110118866, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04056106507778168, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02501298487186432, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021671710535883904, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.021284285932779312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019639965146780014, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019359732046723366, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013556634075939655, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014017230831086636, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012914071790874004, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010190070606768131, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013556634075939655, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013556634075939655, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24674120545387268, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22855734825134277, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22191990911960602, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19949038326740265, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11662435531616211, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10986021161079407, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13246846199035645, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12155988067388535, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11810747534036636, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10230927169322968, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0971817746758461, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06746632605791092, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05825050547719002, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05601394921541214, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05547850951552391, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03376059979200363, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028927747160196304, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02841903269290924, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02602510340511799, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025684017688035965, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017700016498565674, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01772347465157509, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016909020021557808, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011839808896183968, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017700016498565674, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017700016498565674, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.25630995631217957, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.23485052585601807, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.22690990567207336, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.2026367485523224, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11941250413656235, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.11138380318880081, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13847289979457855, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1258116215467453, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.12180911004543304, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.10449561476707458, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09903623908758163, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.07074017822742462, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.06043412536382675, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.057511989027261734, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.056816622614860535, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03552712872624397, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.03017270751297474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02983158826828003, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.027254808694124222, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02681434154510498, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.019040072336792946, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019280800595879555, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018025079742074013, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013666093349456787, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018025079742074013, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018025079742074013, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10756581276655197, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09503040462732315, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08669983595609665, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07583911716938019, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.049946654587984085, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0432819165289402, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06596408784389496, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05885933339595795, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05120573937892914, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0416385754942894, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.040248803794384, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03378632292151451, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028355829417705536, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024280086159706116, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023238008841872215, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017053473740816116, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013046027161180973, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012342341244220734, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011479336768388748, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010818707756698132, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009183565154671669, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009180533699691296, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007668419275432825, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006609884090721607, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017053473740816116, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017053473740816116, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09675110876560211, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08492185175418854, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07689526677131653, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06698526442050934, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04442550987005234, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0381629578769207, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05875566974282265, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05298074707388878, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0456521213054657, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03681961074471474, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03536564111709595, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.029882198199629784, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.025366254150867462, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02154988795518875, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02057727426290512, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015020979568362236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01141489390283823, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010734356939792633, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009941953234374523, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009288580156862736, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007901577278971672, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00790371373295784, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006516528315842152, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00543170515447855, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015020979568362236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015020979568362236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2155134528875351, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19270209968090057, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1830819696187973, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1599261462688446, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10112689435482025, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09170111268758774, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12105651199817657, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10905884951353073, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10315807908773422, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08425386250019073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07943118363618851, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.061847541481256485, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0522153377532959, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04855697602033615, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04768754169344902, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030932258814573288, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024873720481991768, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02408432774245739, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021404199302196503, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020810553804039955, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015990370884537697, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015272960998117924, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014483978971838951, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009627300314605236, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015990370884537697, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015990370884537697, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23307719826698303, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20769855380058289, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19747790694236755, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17398306727409363, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10877759754657745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09859643131494522, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12931248545646667, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11689974367618561, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1110963299870491, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.091249018907547, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08633767068386078, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06668299436569214, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05648941546678543, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.052793338894844055, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05190660059452057, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03350450471043587, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.028131674975156784, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.027344025671482086, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024734988808631897, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024156421422958374, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018204117193818092, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018709445372223854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017015445977449417, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01376375276595354, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018204117193818092, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018204117193818092, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1819170117378235, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16816838085651398, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16295835375785828, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1464349329471588, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0861954465508461, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08094809204339981, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0984942689538002, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09010468423366547, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08736322075128555, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07542907446622849, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07191680371761322, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05048713833093643, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043515894562006, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.041756536811590195, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.041341230273246765, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0254228375852108, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02223174273967743, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.021857479587197304, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020146377384662628, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019893277436494827, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013960285112261772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014565996825695038, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013385646976530552, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010836116969585419, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013960285112261772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013960285112261772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.25467923283576965, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2357347309589386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22904491424560547, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20565634965896606, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12069472670555115, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11366801708936691, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13625623285770416, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12529610097408295, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12223730981349945, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1055426150560379, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10019499063491821, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06963714957237244, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06009865552186966, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05801374092698097, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.057519007474184036, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.034832075238227844, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030050618574023247, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.029555313289165497, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02700466476380825, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026683321222662926, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018368592485785484, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018517253920435905, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017646702006459236, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012579442001879215, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018368592485785484, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018368592485785484, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2561657726764679, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2334875613451004, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.22413267195224762, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.19983284175395966, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11895143985748291, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10995155572891235, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.14012619853019714, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1272621601819992, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.12150095403194427, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.10355961322784424, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09819432348012924, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.07134014368057251, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.06108488142490387, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05736236646771431, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05646110326051712, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.035922471433877945, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.030246375128626823, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.029786715283989906, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02726571261882782, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.026689674705266953, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01941259764134884, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019659876823425293, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018146207556128502, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014035098254680634, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018146207556128502, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018146207556128502, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10856033116579056, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09723477065563202, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09114652127027512, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.079771026968956, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05065513029694557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04540663957595825, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06228664889931679, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.056616973131895065, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05162779986858368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.042468342930078506, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.040340356528759, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03171360120177269, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.027085967361927032, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024410801008343697, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023752400651574135, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015868252143263817, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012736263684928417, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012232140637934208, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01111038401722908, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010671409778296947, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00831450056284666, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00828729197382927, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007321038283407688, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005584612023085356, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015868252143263817, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015868252143263817, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09354843944311142, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0837201252579689, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07740379869937897, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06777328252792358, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04334899038076401, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03831539303064346, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05501130223274231, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.050040103495121, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04424150288105011, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03647653013467789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0347534604370594, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.027849910780787468, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02391057461500168, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020906437188386917, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020138926804065704, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013931944966316223, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010884453542530537, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010335544124245644, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0095148840919137, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00901296641677618, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007269180379807949, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007194042205810547, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006221086252480745, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004734423942863941, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013931944966316223, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013931944966316223, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2326166033744812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20834095776081085, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19913803040981293, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17386066913604736, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10898639261722565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09947199374437332, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12864388525485992, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11571063101291656, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11096153408288956, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09075284004211426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08514827489852905, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06557095050811768, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05529499426484108, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05223299562931061, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05146360024809837, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03274011239409447, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02660699561238289, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025882408022880554, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022825604304671288, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022326340898871422, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016808316111564636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015965761616826057, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015357257798314095, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009846358560025692, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016808316111564636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016808316111564636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2472359985113144, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2268836796283722, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.21837280690670013, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.19525733590126038, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11556445807218552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10740011930465698, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13523642718791962, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12334858626127243, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11741403490304947, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.10037878900766373, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.09648613631725311, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06950689107179642, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05944724380970001, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05591270327568054, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05505038797855377, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.034866299480199814, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02962855063378811, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02895599976181984, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02667563036084175, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.026144398376345634, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018780168145895004, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01946381665766239, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017639771103858948, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014184211380779743, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017639771103858948, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017639771103858948, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1860954314470291, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17208607494831085, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1665610820055008, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1497310847043991, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08824615180492401, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08279301971197128, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10097075253725052, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09260337054729462, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08942826092243195, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07727259397506714, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07359647750854492, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05181366577744484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.044638413935899734, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04266050085425377, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04219159111380577, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025991316884756088, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02257213182747364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.022164786234498024, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020439645275473595, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.020149080082774162, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014023805037140846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014651571400463581, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01334997545927763, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01069763395935297, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014023805037140846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014023805037140846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.25922641158103943, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24018806219100952, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2332906275987625, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20968149602413177, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12289728969335556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11574692279100418, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1392160952091217, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12781864404678345, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12444007396697998, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10760580748319626, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10220622271299362, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07122201472520828, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06131135672330856, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05907886102795601, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05854906514286995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035671621561050415, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03057239018380642, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.030055606737732887, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02748807705938816, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.027141926810145378, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018909204751253128, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018814794719219208, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018136851489543915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012705812230706215, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018136851489543915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018136851489543915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2659390866756439, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2412024289369583, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.23189127445220947, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.20627646148204803, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.12356377393007278, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.11410389840602875, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.14440257847309113, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1311933547258377, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.12640532851219177, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.10687370598316193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.10131599009037018, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.07385652512311935, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0629817321896553, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.059462957084178925, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05861424654722214, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03710780665278435, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.031117122620344162, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.03071054257452488, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02784210443496704, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02730606496334076, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.019935782998800278, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019873058423399925, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018737586215138435, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013909975998103619, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013909975998103619, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013909975998103619, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1118198111653328, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10043176263570786, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09291597455739975, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08187340199947357, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05217408388853073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04622916132211685, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06663930416107178, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06022260710597038, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.053253043442964554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04414952173829079, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04238171502947807, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03391347825527191, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028833061456680298, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025245875120162964, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02433636225759983, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01698877662420273, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013338646851480007, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012693489901721478, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011771872639656067, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011185066774487495, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008965443819761276, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009027015417814255, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007656850852072239, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00626298738643527, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01698877662420273, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01698877662420273, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09462100267410278, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08539774268865585, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07686615735292435, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06779906898736954, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.043823208659887314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03776475414633751, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05933225899934769, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05380752310156822, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04473302885890007, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037512775510549545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0362078994512558, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.029998473823070526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02571491152048111, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.021190356463193893, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0200145673006773, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015003571286797523, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011162104085087776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010392822325229645, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009934436529874802, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009178025647997856, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007874136790633202, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00778051745146513, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006365668028593063, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00518300523981452, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015003571286797523, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015003571286797523, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24370138347148895, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21893028914928436, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20866331458091736, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18275323510169983, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11433172225952148, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10421593487262726, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13753741979599, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12269540876150131, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1163673922419548, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09593377262353897, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0904921293258667, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07054346799850464, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05879318341612816, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.054925933480262756, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.053996775299310684, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03532160446047783, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028231775388121605, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027390293776988983, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02450079284608364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.023875348269939423, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018243372440338135, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017459014430642128, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01637289859354496, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011272086761891842, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018243372440338135, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018243372440338135, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2629941403865814, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.23109665513038635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.21835288405418396, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.19029979407787323, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.12216565757989883, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.11061395704746246, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1466381847858429, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.13265874981880188, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1256774216890335, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.10008446872234344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.09597723186016083, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0754491314291954, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.06404776871204376, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.059203073382377625, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05802210420370102, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.037847328931093216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.03149593994021416, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.030602317303419113, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02723541110754013, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02647543139755726, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.02033299021422863, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.020966732874512672, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.018811866641044617, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.015278504230082035, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.015278504230082035, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.015278504230082035, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17282409965991974, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15992365777492523, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15428505837917328, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13913002610206604, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08201289176940918, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07672587782144547, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09496784210205078, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08700470626354218, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08314818888902664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07207627594470978, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06876896321773529, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04874405637383461, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.042022597044706345, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03978228569030762, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0392424501478672, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024471282958984375, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02122599631547928, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02079358510673046, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01931295357644558, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01898372918367386, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01327972486615181, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014062322676181793, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012517531402409077, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010458838194608688, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01327972486615181, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01327972486615181, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2524537146091461, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23444977402687073, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2275080680847168, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20498758554458618, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11991260200738907, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11294816434383392, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13680744171142578, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12542195618152618, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12140700221061707, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10546477884054184, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10042760521173477, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07008533179759979, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06027299165725708, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05777372792363167, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05718211829662323, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035113658756017685, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030131513252854347, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02959883026778698, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02725275047123432, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026884466409683228, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018779540434479713, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018930871039628983, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01791466400027275, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013167833909392357, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01791466400027275, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01791466400027275, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.25265103578567505, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22840166091918945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.218732550740242, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.19461461901664734, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11718171089887619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10767538845539093, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13899004459381104, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12540601193904877, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11996474862098694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.10118256509304047, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09620462357997894, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.07116477191448212, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.06048715114593506, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.056767795234918594, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05574151873588562, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03617801517248154, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.03017761930823326, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02965926192700863, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.027122337371110916, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02647498808801174, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.019909074530005455, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.020010214298963547, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.018450329080224037, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014595062471926212, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014595062471926212, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014595062471926212, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12453041970729828, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11280345171689987, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10506510734558105, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09296531230211258, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05833573266863823, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05216127634048462, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07428622990846634, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06675713509321213, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05941038578748703, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04991642385721207, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04794912412762642, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.038019608706235886, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03211364522576332, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.028319964185357094, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.027359575033187866, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0191383995115757, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015107371844351292, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.014446667395532131, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013487156480550766, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012889278121292591, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010228636674582958, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010349234566092491, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008801702409982681, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0074235862120985985, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015107371844351292, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015107371844351292, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09892469644546509, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0896148607134819, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0802646055817604, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07119926810264587, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.045758768916130066, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03921961784362793, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06333990395069122, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.057021889835596085, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0467100515961647, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03958785906434059, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0386587455868721, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03221098333597183, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02730938233435154, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022189389914274216, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020795689895749092, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01613456755876541, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011707260273396969, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010835932567715645, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010501728393137455, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009621135890483856, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00845992099493742, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008277318440377712, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006674356758594513, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005507301539182663, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01613456755876541, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01613456755876541, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.26220405101776123, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.23939259350299835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.22947363555431366, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.20444823801517487, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.12407263368368149, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11446104198694229, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1470392644405365, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13275018334388733, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12618684768676758, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10681618750095367, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.10145766288042068, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07522578537464142, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06365425884723663, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05957839637994766, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05859600752592087, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03759932145476341, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.030522173270583153, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.029695812612771988, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.026949092745780945, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.026297643780708313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019387247040867805, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01863904483616352, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017719965428113937, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01174984686076641, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017719965428113937, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017719965428113937, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23613296449184418, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21537315845489502, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20725467801094055, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1790861189365387, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11013735830783844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10230501741170883, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12863367795944214, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11670658737421036, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11187354475259781, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09344859421253204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08618009090423584, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06605451554059982, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05606642737984657, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.053067274391651154, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05233700945973396, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03306156024336815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027716604992747307, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02708221785724163, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024367228150367737, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02390700951218605, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017582520842552185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01760173588991165, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01659250259399414, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01224356796592474, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017582520842552185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017582520842552185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16926322877407074, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15695980191230774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15172874927520752, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13691776990890503, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08025319129228592, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07529567182064056, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09258836507797241, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08488607406616211, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0813838317990303, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07079599052667618, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06761938333511353, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04748952388763428, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04101638123393059, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03893841430544853, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038436442613601685, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023900778964161873, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020813127979636192, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020418178290128708, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01899139955639839, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01869119703769684, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013073690235614777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013804355636239052, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0123807517811656, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01032190304249525, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013073690235614777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013073690235614777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2552069127559662, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23775409162044525, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2311769276857376, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20850802958011627, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12100010365247726, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11436723917722702, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13733580708503723, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12616488337516785, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12247715890407562, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10695971548557281, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10182954370975494, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07016097009181976, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06049126759171486, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05815287306904793, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05759961158037186, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0350845567882061, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030018506571650505, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.029509494081139565, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027159210294485092, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026804793626070023, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01850663125514984, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01837761700153351, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017695017158985138, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012259121984243393, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01837761700153351, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01837761700153351, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.25242406129837036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22830134630203247, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21883299946784973, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.19503363966941833, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11675243824720383, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10735779255628586, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13708943128585815, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12452302873134613, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1194852814078331, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.10080965608358383, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09562256932258606, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0697849914431572, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.059733010828495026, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.056212715804576874, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.055359721183776855, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03511078655719757, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.029499635100364685, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.029059650376439095, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.026377197355031967, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.025829296559095383, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.019000038504600525, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018978092819452286, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017815351486206055, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013372780755162239, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017815351486206055, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017815351486206055, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12232991307973862, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11152344942092896, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10490922629833221, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09308277815580368, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05731010064482689, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05191183090209961, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07066857069730759, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06434496492147446, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0582253634929657, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04927409067749977, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.047059476375579834, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03597982972860336, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030794430524110794, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02761535346508026, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.026828182861208916, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018035244196653366, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01442944910377264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01386276539415121, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012844109907746315, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012337305583059788, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00949500035494566, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009452197700738907, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008343594148755074, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0063985493034124374, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018035244196653366, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018035244196653366, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10177302360534668, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0929185226559639, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08450174331665039, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07518802583217621, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04710977151989937, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04124670848250389, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.062545046210289, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.057323843240737915, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04800018295645714, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.040966883301734924, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03955337405204773, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.031612660735845566, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027247482910752296, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02273002825677395, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021546591073274612, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01582549326121807, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01186725776642561, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011085203848779202, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010652950033545494, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009876463562250137, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008270843885838985, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008102615363895893, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006721684243530035, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005225792992860079, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01582549326121807, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01582549326121807, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2674533724784851, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.24623076617717743, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.23760567605495453, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.21278107166290283, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.12655188143253326, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11795024573802948, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14856567978858948, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13388419151306152, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1285267323255539, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10997940599918365, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.10487513244152069, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07605404406785965, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06408683955669403, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.060707032680511475, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05987444892525673, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.038005534559488297, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.03091425634920597, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.030192377045750618, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.027443403378129005, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02692214399576187, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019525829702615738, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01848893240094185, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017973048612475395, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011356954462826252, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017973048612475395, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017973048612475395, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.24361175298690796, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.22196638584136963, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.2140234112739563, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18967820703983307, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11374222487211227, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10555750876665115, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1314598172903061, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11963523924350739, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11547969281673431, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09710785001516342, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.09228704124689102, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06751944869756699, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05787168815732002, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05505042150616646, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.054391250014305115, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03406061604619026, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02931009978055954, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.028687430545687675, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02610301971435547, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.025675108656287193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018693383783102036, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.019286883994936943, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01781204529106617, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014300496317446232, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01781204529106617, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01781204529106617, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16002513468265533, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1486503779888153, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14350679516792297, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12956838309764862, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07580342888832092, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07108186930418015, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.087849460542202, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08060386031866074, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07685648649930954, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06705021858215332, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06401360034942627, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04500940069556236, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03888970613479614, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03673962503671646, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03623026981949806, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02261560596525669, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01956087350845337, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019158096984028816, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017867926508188248, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01755332387983799, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01224233116954565, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01292012445628643, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011515462771058083, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009545203298330307, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017867926508188248, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017867926508188248, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24786601960659027, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23144397139549255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22491934895515442, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20308025181293488, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11745322495698929, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11108183115720749, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13346973061561584, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12279126048088074, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11886286735534668, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1041208878159523, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09911802411079407, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06815410405397415, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05883682891726494, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05643888935446739, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.055866487324237823, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.034068040549755096, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.029100801795721054, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02859586849808693, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026379844173789024, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0260138101875782, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01790608838200569, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017802702262997627, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017077740281820297, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01180400513112545, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01790608838200569, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01790608838200569, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24480877816677094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21971623599529266, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20952272415161133, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.186569482088089, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11274166405200958, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10277875512838364, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13348492980003357, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12168774753808975, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11575665324926376, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09677677601575851, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09186200797557831, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0681590661406517, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05841660872101784, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.054345402866601944, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05333789438009262, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03435517102479935, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028597436845302582, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.028101269155740738, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025489550083875656, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02484857477247715, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0185617133975029, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018620681017637253, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017179710790514946, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013171756640076637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017179710790514946, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017179710790514946, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10259293764829636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09343580156564713, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08664572983980179, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07749181985855103, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04782811924815178, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04269088804721832, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06139832362532616, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05574490502476692, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04879670962691307, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04153456911444664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04018041491508484, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03127007558941841, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0266849584877491, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.023124894127249718, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.022214826196432114, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01565924845635891, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012183837592601776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011583397164940834, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010958605445921421, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010382324457168579, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00821791123598814, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008256004191935062, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006925125606358051, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005646048579365015, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01565924845635891, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01565924845635891, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09104511886835098, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08257262408733368, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07435363531112671, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06663218140602112, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04195841774344444, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.036298543214797974, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05718123912811279, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.051964789628982544, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.042938780039548874, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03657032921910286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03556284308433533, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02883046679198742, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.024721994996070862, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02031012251973152, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019137758761644363, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014435578137636185, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010710347443819046, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009951566345989704, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009656735695898533, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008902371861040592, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007542109116911888, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007502399850636721, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006000945344567299, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004978481214493513, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014435578137636185, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014435578137636185, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22266948223114014, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19949810206890106, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1870187222957611, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16786633431911469, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1031167209148407, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09224091470241547, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13088099658489227, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11658143252134323, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10587102174758911, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08872375637292862, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08528300374746323, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.066837377846241, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05569849908351898, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04953063279390335, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.048010461032390594, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.033419497311115265, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.025521183386445045, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.024387987330555916, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02261812798678875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02160884067416191, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017179936170578003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016325371339917183, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014540164731442928, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010344380512833595, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017179936170578003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017179936170578003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2436041235923767, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21691758930683136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20647403597831726, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18102799355983734, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11357025057077408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10260023921728134, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13440921902656555, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1220003217458725, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11608190089464188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09507795423269272, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08911310136318207, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0691438838839531, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.058941800147295, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05509210005402565, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05416528135538101, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03472194820642471, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.029386892914772034, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02855939045548439, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02582382783293724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0252276211977005, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01883408986032009, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01957804150879383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01759224385023117, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014455562457442284, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01759224385023117, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01759224385023117, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16373838484287262, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1518310308456421, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.146576926112175, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13251645863056183, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07748807966709137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07261761277914047, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08969124406576157, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08231326192617416, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07858408242464066, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06850694864988327, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06543165445327759, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.045898325741291046, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.039670780301094055, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.037511177361011505, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03698970004916191, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023076215758919716, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01990697719156742, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01950002647936344, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018170872703194618, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017852483317255974, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012507004663348198, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01306240539997816, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011774079874157906, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009558048099279404, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018170872703194618, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018170872703194618, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.25406524538993835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23711314797401428, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23050522804260254, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20819002389907837, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12046076357364655, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11394312232732773, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13695994019508362, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12581993639469147, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12188442796468735, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10680735856294632, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10174678266048431, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07001622766256332, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06037329137325287, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05796283856034279, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05738489702343941, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035130150616168976, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030101710930466652, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02959279716014862, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027364004403352737, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02700056880712509, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018736233934760094, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018718523904681206, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017903868108987808, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01280753593891859, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017903868108987808, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017903868108987808, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.25680509209632874, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22832468152046204, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21699100732803345, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1923125833272934, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11821797490119934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10698547214269638, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.14131563901901245, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12778699398040771, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.12174387276172638, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.1004294902086258, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09520073235034943, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.07210774719715118, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.061426423490047455, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0570901557803154, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05601733550429344, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03637564554810524, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.030191050842404366, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02967279963195324, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.026761630550026894, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02606433443725109, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.019835809245705605, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019869046285748482, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01833040453493595, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014259071089327335, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01833040453493595, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01833040453493595, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11812145262956619, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10814312845468521, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10087931156158447, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09013359993696213, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05535230040550232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04981767013669014, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07068891823291779, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06389325112104416, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05637446790933609, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.048261187970638275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04648272693157196, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03610346093773842, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030754879117012024, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026920337229967117, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0259417537599802, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018168270587921143, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014407516457140446, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01376682985574007, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013048744760453701, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01245016511529684, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009703859686851501, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009976303204894066, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008319279178977013, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0071976506151258945, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018168270587921143, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018168270587921143, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1018264964222908, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09192298352718353, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08166562765836716, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07343941926956177, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04684010520577431, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.039830099791288376, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06494524329900742, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05931587889790535, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.048037633299827576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04084542393684387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.039952803403139114, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03286575898528099, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02827572263777256, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022687789052724838, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021163932979106903, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01645396463572979, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011947906576097012, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010987565852701664, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01078158151358366, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009815494529902935, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008580641821026802, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008476652204990387, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0066880760714411736, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005542671773582697, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01645396463572979, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01645396463572979, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24079838395118713, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21651509404182434, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20354288816452026, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18344652652740479, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1125483587384224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10098642110824585, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1416015475988388, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12586982548236847, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11539565771818161, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.096826933324337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0934443548321724, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07274746149778366, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06028168275952339, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05412468686699867, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05259174481034279, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03646688908338547, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.027856692671775818, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.026688771322369576, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02464216575026512, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.023632705211639404, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0188397578895092, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017656199634075165, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01611950621008873, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011160336434841156, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017656199634075165, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017656199634075165, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21998141705989838, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1975252330303192, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18923082947731018, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16342703998088837, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10167599469423294, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0932718887925148, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11927667260169983, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10815239697694778, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1032164990901947, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08467892557382584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08013642579317093, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.061177823692560196, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05239993333816528, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04940483719110489, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04873480647802353, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03088865987956524, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026623865589499474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02601448819041252, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023412402719259262, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02294732630252838, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01706116460263729, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01799994334578514, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01609625108540058, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013675717636942863, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01706116460263729, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01706116460263729, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1531023532152176, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14173994958400726, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1364537626504898, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1233060285449028, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07234037667512894, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06755431741476059, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0841408520936966, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07733746618032455, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07344791293144226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0638648122549057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0609908290207386, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04307438060641289, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03723149746656418, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03498980402946472, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03445389121770859, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021598659455776215, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018500911071896553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018084928393363953, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016846025362610817, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0165186058729887, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011615091934800148, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01208503358066082, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010858421213924885, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008726552128791809, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018084928393363953, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018084928393363953, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.241622194647789, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22490057349205017, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2181769162416458, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1971970647573471, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11433911323547363, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10779046267271042, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13066735863685608, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12004093080759048, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11580994725227356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1012987270951271, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09649849683046341, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06674075126647949, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05759645625948906, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05501938983798027, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.054406870156526566, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033447057008743286, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02854888327419758, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028019949793815613, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025921475142240524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025535451248288155, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017773538827896118, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0177562665194273, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01688445545732975, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012077049352228642, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017773538827896118, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017773538827896118, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2474973499774933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21953889727592468, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20759880542755127, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1843899041414261, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11358048766851425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10224789381027222, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1376146376132965, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12446264177560806, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11717148125171661, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09679125994443893, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09209208190441132, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.07034796476364136, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.059976112097501755, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05499841645359993, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0537615604698658, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.035640109330415726, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.029368000105023384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02875399962067604, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.026174310594797134, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02537454292178154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01964334025979042, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019793732091784477, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017943555489182472, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014478132128715515, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017943555489182472, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017943555489182472, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1215902715921402, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11157333850860596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10398334264755249, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09343467652797699, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05696148797869682, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.051225610077381134, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07279404997825623, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06609713286161423, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05805272236466408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05000651255249977, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04830809682607651, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0371992290019989, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03174358233809471, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02759575843811035, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02655154839158058, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01865302585065365, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014640786685049534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013962429016828537, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013292906805872917, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012640909291803837, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009927244856953621, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010013247840106487, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008470961824059486, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.007010976783931255, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014640786685049534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014640786685049534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10095378756523132, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09206664562225342, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08143416047096252, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07351852208375931, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04653092473745346, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.039522696286439896, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06548039615154266, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.059932831674814224, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0475933663547039, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04100368916988373, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04023882746696472, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03322622552514076, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.028431303799152374, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022587405517697334, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020959923043847084, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016645114868879318, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011923779733479023, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010926437564194202, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010844528675079346, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009836562909185886, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008736414834856987, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008537798188626766, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006723084487020969, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005609618034213781, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016645114868879318, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016645114868879318, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2551848888397217, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2313653528690338, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21962237358093262, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1979016214609146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11964058130979538, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10889001190662384, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1469198614358902, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13131216168403625, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1222364604473114, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.1035669818520546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09955517947673798, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07524482160806656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06287908554077148, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05742365121841431, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.056092776358127594, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03764404356479645, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02937566675245762, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.028308628126978874, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02606627717614174, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02516254410147667, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019285812973976135, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018183821812272072, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01682317815721035, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011206353083252907, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018183821812272072, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018183821812272072, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2222689837217331, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20331449806690216, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1957187056541443, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17367342114448547, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10337335616350174, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09615123271942139, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12041541188955307, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10890718549489975, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10478329658508301, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08873052895069122, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08463096618652344, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.061903875321149826, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05295303836464882, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05051816254854202, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04993738606572151, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031280312687158585, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02746850810945034, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02696816436946392, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024705125018954277, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02435249835252762, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017413467168807983, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018795769661664963, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01664375141263008, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014665905386209488, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017413467168807983, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017413467168807983, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15098656713962555, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1399936079978943, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13497760891914368, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12195736169815063, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07132705301046371, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06676565110683441, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08259214460849762, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07598419487476349, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07236377149820328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06303049623966217, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06017815321683884, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04215418919920921, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03654143214225769, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.034461840987205505, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03395681828260422, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02117328904569149, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01815648190677166, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017759498208761215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01652565598487854, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016218245029449463, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011365113779902458, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011756538413465023, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010662897489964962, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0084141306579113, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01815648190677166, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01815648190677166, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23764099180698395, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22151567041873932, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21492378413677216, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19435037672519684, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11257174611091614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10626795142889023, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1284242421388626, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11811445653438568, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11401275545358658, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09984873235225677, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09526508301496506, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06560240685939789, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05669926479458809, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.054208412766456604, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.053622324019670486, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03289658576250076, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028268909081816673, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027768708765506744, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025725768879055977, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025361234322190285, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017617490142583847, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017772426828742027, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016781864687800407, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012337998487055302, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017617490142583847, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017617490142583847, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23631420731544495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20959995687007904, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19749370217323303, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17460310459136963, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10851486772298813, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09729178249835968, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.132106214761734, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11967417597770691, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1117868646979332, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09210342913866043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08741147816181183, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06757188588380814, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05776281654834747, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.052538275718688965, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05126303434371948, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.034272462129592896, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.0281085055321455, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027441974729299545, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024987438693642616, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024143122136592865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01907033659517765, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019046016037464142, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01734377071261406, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01392062846571207, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01734377071261406, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01734377071261406, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12154015153646469, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1111690029501915, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1022082194685936, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0923035740852356, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0567491240799427, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05018014460802078, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07528066635131836, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0677420124411583, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05793808400630951, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04990820959210396, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04861769080162048, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03845732659101486, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03245987743139267, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.027547823265194893, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.026268282905220985, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.019300641492009163, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014680684544146061, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013863794505596161, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013362477533519268, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012576279230415821, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010282772593200207, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010251819156110287, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008484535850584507, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.007181599270552397, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014680684544146061, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014680684544146061, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10384104400873184, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09376410394906998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08104054629802704, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07337597757577896, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04763659089803696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.039211444556713104, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0700908750295639, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0634426549077034, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.048952676355838776, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.041847843676805496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04139510542154312, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03541162982583046, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.030135486274957657, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.023131605237722397, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02117953449487686, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01770985871553421, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012300942093133926, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011074867099523544, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011184806004166603, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009938661940395832, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.009283123537898064, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009064090438187122, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00682488176971674, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005912240128964186, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01770985871553421, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01770985871553421, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2535170912742615, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22645866870880127, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2127530872821808, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.19119396805763245, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1176762804389, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.105263851583004, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14780175685882568, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13174104690551758, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12082815170288086, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.1008373573422432, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09723006188869476, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07607948780059814, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06304434686899185, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05655822902917862, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.054939690977334976, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.038106780499219894, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.029018394649028778, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027776643633842468, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025550110265612602, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.024482492357492447, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019564485177397728, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018318327143788338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01658572070300579, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011399580165743828, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018318327143788338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018318327143788338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2438829392194748, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2121388018131256, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20085567235946655, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17616015672683716, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11353733390569687, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10090838372707367, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13251589238643646, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12077277898788452, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11600583046674728, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09213324636220932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08624576032161713, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0682399570941925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.058448389172554016, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05517629161477089, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.054387304931879044, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.034360963851213455, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.029594533145427704, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02879325859248638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025502052158117294, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024977046996355057, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018831763416528702, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.019803661853075027, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017796702682971954, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014878991059958935, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017796702682971954, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017796702682971954, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15736661851406097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14655546844005585, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14190207421779633, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12827017903327942, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07446831464767456, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07013717293739319, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08514605462551117, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07856608182191849, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07540865242481232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06599202752113342, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06289084255695343, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04344705119729042, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03771847486495972, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03588557988405228, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.035452235490083694, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021755153313279152, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0187715794891119, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018411055207252502, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017093727365136147, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016821695491671562, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011577042751014233, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01192514132708311, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010969852097332478, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008359862491488457, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018411055207252502, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018411055207252502, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24910962581634521, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2326180636882782, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22634483873844147, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2046263962984085, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11788083612918854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11168920248746872, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13340391218662262, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12285911291837692, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1192731112241745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10467713326215744, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09965712577104568, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06797321140766144, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05884068086743355, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.056608669459819794, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05607222765684128, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03406020253896713, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02916717156767845, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028689153492450714, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026484938338398933, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026148313656449318, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018015732988715172, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017783479765057564, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01726548746228218, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011763599701225758, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018015732988715172, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018015732988715172, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23629489541053772, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21056529879570007, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20013034343719482, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17670710384845734, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10851462930440903, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09831567853689194, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12872084975242615, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11740022897720337, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11155714094638824, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09211281687021255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08686667680740356, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06572320312261581, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05633270740509033, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.052238792181015015, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.051236726343631744, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03313787654042244, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027424054220318794, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.026943735778331757, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02422475814819336, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023594843223690987, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01798497699201107, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01779470592737198, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016632117331027985, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012501783668994904, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01798497699201107, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01798497699201107, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11750079691410065, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10653182864189148, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0976954996585846, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08826833218336105, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0546606183052063, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.048081934452056885, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07225271314382553, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06526229530572891, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05601316690444946, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04777644947171211, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04660916328430176, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0369105190038681, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03124917857348919, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026493564248085022, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025270944461226463, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01852668635547161, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014060234650969505, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013261925429105759, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012725144624710083, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011950694024562836, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009879942052066326, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009753313846886158, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008185763843357563, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006737895775586367, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014060234650969505, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014060234650969505, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10417705029249191, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09363364428281784, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0817779153585434, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07405122369527817, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.047977034002542496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03970333933830261, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06952034682035446, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06262099742889404, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04935076832771301, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04198093339800835, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.041548680514097214, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03525085002183914, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.030007347464561462, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.023310532793402672, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021426206454634666, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017623543739318848, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01233664620667696, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011181224137544632, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011178961955010891, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010008998215198517, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00920550525188446, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0089960852637887, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00685169268399477, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005902008153498173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017623543739318848, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017623543739318848, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24121993780136108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21344298124313354, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19783265888690948, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1776648610830307, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11118041723966599, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09761868417263031, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14343911409378052, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12764564156532288, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11457531154155731, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09461501985788345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09154999256134033, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0739993080496788, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0610743947327137, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05335933715105057, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05144205316901207, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03715486451983452, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.027422353625297546, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0259845107793808, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024063820019364357, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022763127461075783, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01905488781630993, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017636079341173172, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015589955262839794, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010878989472985268, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017636079341173172, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017636079341173172, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2509031593799591, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.22246861457824707, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.21206606924533844, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18369655311107635, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11630554497241974, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10561396926641464, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13495543599128723, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12391345202922821, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11871522665023804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0958736389875412, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08891799300909042, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06923535466194153, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05953112989664078, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05603111907839775, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.055185575038194656, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03474711999297142, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.029396671801805496, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.028606215491890907, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025368506088852882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024811945855617523, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018598215654492378, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018884848803281784, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01739356480538845, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013306112959980965, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01739356480538845, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01739356480538845, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15592285990715027, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14558301866054535, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14127454161643982, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12782888114452362, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07386356592178345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0697837844491005, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08395036309957504, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07745799422264099, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07474250346422195, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06553570926189423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06250541657209396, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04284288361668587, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03718538582324982, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.035563305020332336, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.035181619226932526, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021471701562404633, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01854870654642582, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018224123865365982, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016893040388822556, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016654161736369133, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011454545892775059, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0116755785420537, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01092979684472084, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008122265338897705, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018224123865365982, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018224123865365982, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.25825750827789307, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24169129133224487, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23560602962970734, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.21300801634788513, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12235762923955917, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11622162163257599, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13774561882019043, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12693895399570465, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12373600900173187, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10881341993808746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10358239710330963, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07029549777507782, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06084255501627922, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05876342952251434, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.058267075568437576, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035165607929229736, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030278142541646957, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.029824981465935707, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027526581659913063, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.027208181098103523, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018605316057801247, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01842542178928852, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0179036483168602, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012239549309015274, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0179036483168602, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0179036483168602, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23660250008106232, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21129672229290009, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20090588927268982, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17775359749794006, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10846579819917679, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09835038334131241, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12839940190315247, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11740760505199432, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1114267110824585, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09234350919723511, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0870920792222023, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06509343534708023, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05617076903581619, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.052101898938417435, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05110681429505348, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03275013342499733, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02715109847486019, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02665591612458229, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023969391360878944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.0233217254281044, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017550086602568626, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017356138676404953, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016194790601730347, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011841286905109882, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017550086602568626, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017550086602568626, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1172647774219513, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10596885532140732, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09692598134279251, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08759435266256332, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0546264611184597, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.047696322202682495, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07231813669204712, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06539925932884216, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0559384748339653, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04753073677420616, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04646112769842148, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03695562481880188, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.031361766159534454, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026467645540833473, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0252035204321146, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01855369843542576, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014042697846889496, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01319358590990305, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012667692266404629, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01187229622155428, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009876294061541557, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009759442880749702, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008166071027517319, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006726232822984457, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014042697846889496, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014042697846889496, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11193082481622696, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10053371638059616, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08768881857395172, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07940678298473358, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.051516495645046234, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04269008710980415, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07416751980781555, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06725619733333588, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.052991244941949844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04492681473493576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04437999799847603, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.037609852850437164, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03205791115760803, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02504740096628666, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02307850681245327, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.018817024305462837, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013314730487763882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012088087387382984, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.012053036130964756, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01082561258226633, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00989870261400938, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009726828895509243, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007475345395505428, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006461519747972488, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013314730487763882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013314730487763882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23666059970855713, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2080051600933075, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1914009302854538, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17181716859340668, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10874587297439575, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09449915587902069, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14217358827590942, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12645703554153442, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11235278844833374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09207092225551605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0891353040933609, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07278051227331161, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.060380298644304276, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.052236348390579224, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05016034469008446, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03646797314286232, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.026892099529504776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02538171224296093, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023540470749139786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022149330005049706, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01869165524840355, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017498519271612167, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015199306420981884, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010852296836674213, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017498519271612167, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017498519271612167, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.24111703038215637, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21199779212474823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20074400305747986, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17264246940612793, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11257511377334595, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10097339004278183, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13238273561000824, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12078670412302017, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11477836966514587, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09014859795570374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08569163829088211, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06780017167329788, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.058304313570261, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05456126853823662, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05364196375012398, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.034041184931993484, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.029061907902359962, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.028217848390340805, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02476455643773079, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024163465946912766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018469449132680893, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01931685581803322, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01727525144815445, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014232066459953785, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01727525144815445, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01727525144815445, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16892515122890472, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15773160755634308, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15315397083759308, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13850310444831848, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08001376688480377, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07560806721448898, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09081641584634781, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08381136506795883, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08095633238554001, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07098782062530518, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06766526401042938, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.046367157250642776, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040194638073444366, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03847626969218254, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03806827962398529, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023189915344119072, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019972089678049088, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019614769145846367, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01816418580710888, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0179028008133173, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01229063794016838, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012427919544279575, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01172702107578516, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008472456596791744, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01816418580710888, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01816418580710888, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.26380807161331177, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24677908420562744, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2405896782875061, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.21754789352416992, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12503793835639954, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11875421553850174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1405937373638153, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1297149360179901, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12640967965126038, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11117105931043625, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10578641295433044, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07172501087188721, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.062104493379592896, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06000390276312828, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.059492871165275574, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03584595397114754, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030831821262836456, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.030361460521817207, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027993347495794296, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.027671189978718758, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01883596181869507, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018633771687746048, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01812979392707348, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012205381877720356, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01812979392707348, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01812979392707348, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24346940219402313, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21767950057983398, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20718947052955627, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18359430134296417, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11178974062204361, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10152336210012436, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13230469822883606, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12075873464345932, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11482606828212738, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09529446065425873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09008738398551941, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0671277642250061, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05774647742509842, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.053702548146247864, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.052705466747283936, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.033790018409490585, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027967901900410652, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027469728142023087, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024711016565561295, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024075012654066086, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018227919936180115, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01783195696771145, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016886644065380096, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01216847449541092, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018227919936180115, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018227919936180115, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11864744126796722, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10772655159235, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09899269044399261, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0895114615559578, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.055274806916713715, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04866761714220047, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07233744859695435, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06572699546813965, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.056526511907577515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04825001582503319, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04693928733468056, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03687764331698418, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.031413860619068146, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02674729935824871, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025554833933711052, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018471289426088333, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014104635454714298, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013292020186781883, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01272412296384573, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011961715295910835, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009786209091544151, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009653509594500065, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008158460259437561, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006538952235132456, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014104635454714298, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014104635454714298, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10772724449634552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09640684723854065, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08575297892093658, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07748343795537949, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04960105940699577, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04185214638710022, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06871948391199112, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06255614757537842, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0510282889008522, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.043111078441143036, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04223901405930519, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03489825129508972, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02989320643246174, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02399325557053089, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02241460047662258, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01746979169547558, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012616182677447796, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0115896537899971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011346466839313507, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01033426821231842, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.009165599010884762, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008911305107176304, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007142400369048119, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00579680223017931, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01746979169547558, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01746979169547558, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24959304928779602, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22372201085090637, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21025995910167694, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18907251954078674, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11590546369552612, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10384459048509598, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14378692209720612, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12985624372959137, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1188722625374794, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09941983222961426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09557196497917175, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07347778975963593, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06202706694602966, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.055635325610637665, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.054040923714637756, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.036753155291080475, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028501931577920914, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027305353432893753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025159701704978943, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02409016340970993, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018858950585126877, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017956547439098358, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016186028718948364, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011112923733890057, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017956547439098358, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017956547439098358, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23156222701072693, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20635560154914856, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19725088775157928, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17044438421726227, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10725664347410202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09747496247291565, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12423441559076309, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11384524405002594, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10938108712434769, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08842318505048752, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08309900015592575, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06347128748893738, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05468730255961418, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05171012505888939, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05097387358546257, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031774360686540604, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027128292247653008, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026389047503471375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02341814897954464, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022952474653720856, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016973713412880898, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017403332516551018, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01599809154868126, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012300536967813969, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016973713412880898, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016973713412880898, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17211760580539703, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16068266332149506, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15609127283096313, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14103733003139496, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08155323565006256, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07707739621400833, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09258455783128738, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0853346586227417, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08249407261610031, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07227102667093277, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06892210245132446, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04719848185777664, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040958479046821594, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03925108164548874, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038849737495183945, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023674942553043365, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02041751705110073, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020063266158103943, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018564874306321144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01830952800810337, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012665046378970146, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01276205014437437, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012117241509258747, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008792474865913391, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01830952800810337, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01830952800810337, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2676388621330261, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.250352680683136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2440071702003479, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22056488692760468, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1269797682762146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.12047086656093597, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1425468921661377, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13165906071662903, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12831130623817444, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11266422271728516, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.1071920096874237, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07270456850528717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06303176283836365, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.060916684567928314, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06041768938302994, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03634164482355118, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03129023686051369, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.03081120178103447, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02837732806801796, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.028051909059286118, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019066395238041878, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018883801996707916, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018356984481215477, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012324226088821888, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018356984481215477, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018356984481215477, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2461528331041336, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2180933654308319, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20672409236431122, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18344952166080475, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11262861639261246, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10143385082483292, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13338369131088257, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1223425567150116, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11596603691577911, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09535499662160873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09048084914684296, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06786113232374191, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05847933888435364, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.054076023399829865, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.053000301122665405, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.034097157418727875, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02811414934694767, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027578987181186676, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02469708025455475, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02399625815451145, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018234899267554283, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017917748540639877, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016760027036070824, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012103433720767498, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018234899267554283, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018234899267554283, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12638236582279205, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1150963306427002, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10613509267568588, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09596237540245056, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05901717767119408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05230095610022545, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07678792625665665, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06986457854509354, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.06031358987092972, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.051689982414245605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.05029529333114624, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03915791213512421, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03347606956958771, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02859623171389103, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.027340300381183624, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01961452327668667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01513894647359848, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.014317191205918789, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013706796802580357, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012918291613459587, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010356101207435131, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01041681133210659, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008644512854516506, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.007175980135798454, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01513894647359848, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01513894647359848, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11823214590549469, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10705334693193436, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.09492865204811096, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.08598186075687408, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.054720163345336914, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04623837396502495, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07669137418270111, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06984876096248627, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05610791593790054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04797263815999031, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04713764041662216, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03895661234855652, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0333685465157032, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.026485584676265717, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.024619460105895996, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.019471045583486557, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013928079046308994, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012738929130136967, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01260630413889885, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01140899583697319, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.010196772404015064, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009902127087116241, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007862133905291557, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006372467149049044, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013928079046308994, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013928079046308994, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2650124132633209, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.23978309333324432, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2277725338935852, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.2050466239452362, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.12383680790662766, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11267408728599548, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.15045014023780823, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13591310381889343, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12661579251289368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10702978074550629, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.10264439880847931, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07672149688005447, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06495873630046844, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05939054116606712, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05804457515478134, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03833873197436333, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.03040272556245327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02933722548186779, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02696406841278076, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.026058197021484375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019681958481669426, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0188338253647089, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017344173043966293, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011647053062915802, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017344173043966293, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017344173043966293, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23119983077049255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20917625725269318, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1994708776473999, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16927364468574524, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10801433026790619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0988948717713356, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12744277715682983, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1165725439786911, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10946083068847656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0890311747789383, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08301779627799988, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0650126114487648, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05581683665513992, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05192051827907562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05097154155373573, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032550062984228134, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026925446465611458, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026139158755540848, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023123180493712425, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02247968502342701, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017215682193636894, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017008740454912186, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015854470431804657, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011368876323103905, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017215682193636894, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017215682193636894, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16858379542827606, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15731441974639893, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15282687544822693, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13802853226661682, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07990846782922745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07552161067724228, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09055803716182709, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08358894288539886, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0808536484837532, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07078175991773605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06740150600671768, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.046169184148311615, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04008644074201584, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.038423553109169006, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03803585097193718, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023114819079637527, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01992557942867279, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019578896462917328, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018090268597006798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017840100452303886, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012246466241776943, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012362617999315262, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011705050244927406, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008407206274569035, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018090268597006798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018090268597006798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.27048054337501526, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.25301095843315125, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2466760128736496, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2229071408510208, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12834064662456512, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.12181287258863449, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.14434245228767395, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1330147385597229, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1297353059053421, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11389105767011642, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10844779014587402, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07353061437606812, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06373948603868484, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06162121891975403, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06111441180109978, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03685760498046875, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.031696245074272156, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.031209534034132957, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.028740914538502693, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.028419850394129753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01947302371263504, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.019186805933713913, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01876046694815159, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012628084979951382, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012628084979951382, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012628084979951382, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24330458045005798, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21505846083164215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20327353477478027, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18083932995796204, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.1110994964838028, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09973928332328796, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13284899294376373, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12156491726636887, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11469660699367523, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09406333416700363, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08954694122076035, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06728013604879379, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.058050088584423065, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05333231762051582, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05216709524393082, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03385242819786072, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027691170573234558, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027112821117043495, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024310220032930374, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023544667288661003, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018137505277991295, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017660386860370636, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016564879566431046, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011799449101090431, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018137505277991295, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018137505277991295, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.13195137679576874, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.12063200771808624, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.11172012239694595, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.10082944482564926, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0617978498339653, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05494901165366173, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0796690434217453, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.07265070080757141, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.06302818655967712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0541815422475338, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.052533023059368134, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04058713838458061, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.034861333668231964, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.029847415164113045, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02857278287410736, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02033259905874729, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015651052817702293, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.014812320470809937, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.014148281887173653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.013345472514629364, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010656406171619892, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010577620007097721, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008925828151404858, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.007075462490320206, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015651052817702293, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015651052817702293, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11890176683664322, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10792489349842072, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0963183045387268, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.08722609281539917, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.055167607963085175, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04699113965034485, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07595247775316238, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06969829648733139, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05648917704820633, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04840976372361183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04737302288413048, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03855225071310997, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.033321961760520935, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02668941020965576, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.024897733703255653, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.019287673756480217, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013977428898215294, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012853731401264668, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.012649467214941978, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.011532695963978767, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.010041431523859501, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00983707420527935, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0078468918800354, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006327178794890642, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013977428898215294, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013977428898215294, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.267900288105011, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2431424856185913, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2310427725315094, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.2080307900905609, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.12536107003688812, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11421319097280502, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.15298108756542206, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13748736679553986, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12798944115638733, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.1086835265159607, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.10403699427843094, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0781254917383194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06567651033401489, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.06017247959971428, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05879030376672745, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.038984496146440506, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.030756276100873947, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.029655802994966507, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.027295121923089027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.026391413062810898, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.020040808245539665, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018957937136292458, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017657794058322906, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011673064902424812, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017657794058322906, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017657794058322906, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2509867548942566, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.22139711678028107, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.21006259322166443, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18356503546237946, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11679989844560623, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.1052233949303627, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1374712586402893, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12518441677093506, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11950508505105972, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09548823535442352, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08981726318597794, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.07011570036411285, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.06005963683128357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.056175459176301956, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05522337183356285, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03522731363773346, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.029196184128522873, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.028270557522773743, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024934696033596992, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024293486028909683, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01882813684642315, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018473545089364052, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017490357160568237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01252575684338808, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017490357160568237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017490357160568237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.171192929148674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15964454412460327, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15503011643886566, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13997384905815125, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08119245618581772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07665702700614929, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09213025867938995, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08498077839612961, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0821499228477478, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07185900211334229, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06836128234863281, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.047043416649103165, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040790051221847534, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.039073873311281204, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038659125566482544, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023579750210046768, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02030032128095627, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019942879676818848, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01842595264315605, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018169231712818146, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01258340198546648, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01264896523207426, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012030096724629402, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008665487170219421, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018169231712818146, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018169231712818146, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2723749577999115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2546529173851013, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24825093150138855, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2241089940071106, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12935858964920044, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.12275218963623047, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1450955718755722, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13409197330474854, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.13077765703201294, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11464668065309525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10894584655761719, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07396159321069717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06420594453811646, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.062050219625234604, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06153666973114014, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03691570460796356, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03178267180919647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.031293660402297974, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.028765367344021797, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02843482419848442, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019188379868865013, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01905084401369095, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01845935545861721, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012285558506846428, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012285558506846428, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012285558506846428, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2446848601102829, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21568025648593903, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2031620293855667, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18133853375911713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11168579012155533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09961745142936707, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1341729611158371, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12307113409042358, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11538158357143402, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09458878636360168, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09035948663949966, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06826017796993256, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05885273963212967, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.053656578063964844, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.052366990596055984, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03418345749378204, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027918415144085884, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02726702019572258, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02453864924609661, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02369764819741249, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01812831126153469, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01796327345073223, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01637895777821541, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01206684298813343, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01812831126153469, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01812831126153469, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1246747151017189, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11349353939294815, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10405228286981583, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09381216764450073, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05823856592178345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.051145266741514206, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07649775594472885, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06984306871891022, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05954011157155037, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.050947405397892, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04959666356444359, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.039029669016599655, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0334809347987175, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0281683262437582, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.026783360168337822, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.019549459218978882, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014807382598519325, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013906031847000122, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013369710184633732, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012495392933487892, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010208581574261189, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01015847735106945, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00834957417100668, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006774343084543943, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014807382598519325, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014807382598519325, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11451546847820282, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10337290167808533, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.09032676368951797, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.08179035037755966, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.05281636863946915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.043931350111961365, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07584904879331589, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06904470920562744, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05423083156347275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04625728353857994, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04562900960445404, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03843110427260399, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.032868288457393646, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.025626450777053833, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.023620212450623512, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.019219450652599335, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013531646691262722, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012279171496629715, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01226157695055008, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010999835096299648, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.010134429670870304, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009798215702176094, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00768751185387373, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006375280674546957, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013531646691262722, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013531646691262722, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.25675907731056213, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2291417419910431, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21388404071331024, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.19217441976070404, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11896057426929474, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10564307868480682, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.15121012926101685, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13495950400829315, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12228816002607346, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10185474157333374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09836608171463013, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07756491005420685, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06443354487419128, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05712451785802841, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05529102310538292, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03884003683924675, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02928631380200386, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027889031916856766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02578856609761715, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.024563932791352272, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019962439313530922, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018574047833681107, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016739288344979286, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0114213228225708, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016739288344979286, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016739288344979286, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2537306547164917, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.22568370401859283, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.2156510353088379, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18517489731311798, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11876077950000763, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10868935286998749, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13653945922851562, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12557832896709442, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1208539605140686, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09750048816204071, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.09042949229478836, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06982582807540894, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.06014251708984375, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.057030778378248215, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.056270990520715714, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03487011790275574, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02948472835123539, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.028767269104719162, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025220416486263275, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02470974065363407, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018321942538022995, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0182956475764513, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01730370707809925, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012200537137687206, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018321942538022995, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018321942538022995, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.174404114484787, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16269584000110626, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1580069363117218, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14257392287254333, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08280936628580093, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07818733900785446, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0939369723200798, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08669427782297134, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08378612250089645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07324900478124619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06970179080963135, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04794231802225113, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04159959405660629, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03983449935913086, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.039420515298843384, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02398308366537094, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020674318075180054, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02031022682785988, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018753113225102425, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018489234149456024, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01269968319684267, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012853633612394333, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012120204977691174, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008766121231019497, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01269968319684267, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01269968319684267, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.27085521817207336, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.253184974193573, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2467358410358429, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.22262530028820038, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1287091076374054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1221214309334755, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1446727216243744, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.13352340459823608, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.13015688955783844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11406657099723816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10837963968515396, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0738409087061882, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06397942453622818, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.06179215759038925, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.06127650663256645, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03691772744059563, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.03175583854317665, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.03126388415694237, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.028752325102686882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02842317707836628, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.019392214715480804, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01918710209429264, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.018661221489310265, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012550906278192997, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012550906278192997, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012550906278192997, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24257540702819824, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21271756291389465, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19948554039001465, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1782866269350052, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11058703809976578, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09798315912485123, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13342449069023132, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12268561869859695, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1144515872001648, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09328745305538177, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08931155502796173, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06795327365398407, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.058765385299921036, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05321480333805084, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.051840707659721375, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03423234447836876, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02783314511179924, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027147551998496056, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024440793320536613, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023544158786535263, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018404649570584297, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01815512217581272, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016605623066425323, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01238467451184988, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018404649570584297, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018404649570584297, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12716662883758545, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11579912155866623, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10767972469329834, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09714864939451218, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.059534501284360886, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05307710915803909, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07601740211248398, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06906051188707352, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.06078251451253891, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05200224742293358, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.050303466618061066, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03884371370077133, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03314753994345665, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.028795914724469185, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.027685202658176422, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0194599200040102, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015158573165535927, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.014412800781428814, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01368031557649374, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012979518622159958, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010266978293657303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010236125439405441, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008705592714250088, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006984143517911434, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015158573165535927, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015158573165535927, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11409368366003036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10401047021150589, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.09544985741376877, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.08610205352306366, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.053160447627305984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0468151830136776, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06981276720762253, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06361313909292221, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05430356413125992, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04656065255403519, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04516276344656944, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.035349950194358826, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.030460556969046593, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.025698034092783928, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0244771596044302, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01774412952363491, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013479795306921005, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012676190584897995, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.012169810011982918, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.011403649114072323, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00931220967322588, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009194063022732735, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007672710344195366, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006093296688050032, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01774412952363491, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01774412952363491, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24075433611869812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21852748095989227, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2072335183620453, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18645240366458893, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11257024109363556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10250230878591537, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13612809777259827, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12388770282268524, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11497638374567032, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09746862947940826, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09341509640216827, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06940001249313354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.059205830097198486, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.054011084139347076, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05270805209875107, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.034575045108795166, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.027567576617002487, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.026548320427536964, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024463050067424774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.023592976853251457, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01777459681034088, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017047692090272903, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01578390784561634, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010401521809399128, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01777459681034088, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01777459681034088, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22319713234901428, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20066432654857635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1922689527273178, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16360726952552795, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10486003756523132, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09658697992563248, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12068749964237213, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11074481904506683, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10651137679815292, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0861130878329277, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08026138693094254, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06198379024863243, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.053352199494838715, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05076989531517029, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05016567185521126, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03126685321331024, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02698567509651184, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026415985077619553, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023365244269371033, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02297034114599228, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017132308334112167, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0176994651556015, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016315031796693802, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013029155321419239, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017132308334112167, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017132308334112167, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16880063712596893, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15719416737556458, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15243449807167053, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13753721117973328, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08015461266040802, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07554200291633606, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09126991778612137, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08419093489646912, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0811665877699852, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07082301378250122, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06741170585155487, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0466403029859066, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04044396057724953, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.038624171167612076, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0381908118724823, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02335994690656662, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020122110843658447, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01975196599960327, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018253076821565628, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01797609031200409, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012432555668056011, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012631319463253021, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011835341341793537, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008729537017643452, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018253076821565628, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018253076821565628, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2583978474140167, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24123390018939972, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23491322994232178, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2119939923286438, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12280064076185226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11635104566812515, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13831424713134766, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12755458056926727, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1241612657904625, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10869080573320389, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10336310416460037, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07073415070772171, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06115947291254997, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05899418517947197, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.058477479964494705, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0353747121989727, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.030374176800251007, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.029891259968280792, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027497343719005585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.027168555185198784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018685854971408844, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018447939306497574, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017952006310224533, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012177122756838799, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017952006310224533, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017952006310224533, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2403484582901001, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2093004733324051, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19532416760921478, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17435118556022644, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10937201976776123, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0960923284292221, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13419519364833832, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12222417443990707, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11348719149827957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09172260761260986, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0879359021782875, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06823844462633133, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05856004357337952, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.052698880434036255, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05124392360448837, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03441238030791283, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027670715004205704, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.026937350630760193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024221207946538925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023263443261384964, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018648432567715645, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018233055248856544, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01671023666858673, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012551938183605671, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018233055248856544, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018233055248856544, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12048809230327606, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10966697335243225, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09925138205289841, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0897003561258316, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05624627694487572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04866175353527069, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07639498263597488, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06921978294849396, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05750836804509163, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.049303796142339706, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04824119806289673, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03903742507100105, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.033179137855768204, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.027280667796730995, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025731954723596573, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.019527312368154526, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01450428832322359, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01350976899266243, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013165214098989964, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012183699756860733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010292344726622105, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010254887863993645, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008214474655687809, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006983690895140171, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01450428832322359, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01450428832322359, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10751757770776749, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09704293310642242, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08266398310661316, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07500455528497696, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04948337376117706, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03997325897216797, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07459479570388794, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06730852276086807, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.050933197140693665, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.043576858937740326, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04336320608854294, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03760500252246857, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.032094668596982956, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.024082999676465988, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021801147609949112, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.018868152052164078, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012835816480219364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011444563046097755, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01170139666646719, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01027580164372921, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00985391903668642, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00962060410529375, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007046193350106478, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006290207151323557, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012835816480219364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012835816480219364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.251370906829834, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22358468174934387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20746494829654694, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18626902997493744, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11629512161016464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10251816362142563, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14993716776371002, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13398797810077667, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11977528780698776, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09967862069606781, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09641198068857193, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07706240564584732, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06412382423877716, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.056000690907239914, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05391782522201538, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03866428881883621, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028949115425348282, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027472933754324913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0255973469465971, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.024241287261247635, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.019915077835321426, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018887897953391075, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016403699293732643, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.012036380358040333, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016403699293732643, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016403699293732643, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23057964444160461, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20259180665016174, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19208002090454102, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16550308465957642, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10682890564203262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09639320522546768, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12546515464782715, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11453046649694443, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10920465737581253, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08589381724596024, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08157934993505478, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06428361684083939, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.054967813193798065, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.051475394517183304, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05063632130622864, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03217582032084465, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026982704177498817, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026214003562927246, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02288057841360569, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022319646552205086, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017269471660256386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017375284805893898, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016122963279485703, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012195592746138573, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017269471660256386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017269471660256386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15592968463897705, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14496628940105438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.140168696641922, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12643514573574066, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07395715266466141, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06945285946130753, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08483576774597168, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07826157659292221, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07494870573282242, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0652695894241333, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0621710829436779, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.043376289308071136, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.037576109170913696, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03564054146409035, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03518369793891907, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021722767502069473, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01855238527059555, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01817653328180313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016814634203910828, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01651982218027115, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011557906866073608, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011682236567139626, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010920094326138496, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008033981546759605, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01817653328180313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01817653328180313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2352571040391922, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.219267338514328, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2131141573190689, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19219252467155457, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11176225543022156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10559777170419693, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.126626655459404, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11673181504011154, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11311860382556915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09877876192331314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09390097856521606, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06474200636148453, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05600468069314957, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.053768835961818695, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.053235940635204315, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03243694081902504, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027877358719706535, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02739659510552883, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025251492857933044, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024913405999541283, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017230011522769928, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01725548878312111, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016475971788167953, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011720489710569382, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017230011522769928, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017230011522769928, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2209642231464386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19219259917736053, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17919038236141205, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15889599919319153, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10073163360357285, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08845451474189758, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12294818460941315, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11249624937772751, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10444481670856476, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08397365361452103, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08006132394075394, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06276018917560577, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0540408194065094, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04865449294447899, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04730776324868202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031669266521930695, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025727953761816025, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025062870234251022, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02248799428343773, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02161213383078575, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017196813598275185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017179271206259727, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01543651707470417, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01211607176810503, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017196813598275185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017196813598275185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11949926614761353, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10858223587274551, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09903478622436523, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08961588889360428, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05563711002469063, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.048512522131204605, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0757451131939888, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06738625466823578, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0569680780172348, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04878459498286247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.047960296273231506, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03874850645661354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03229006752371788, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026941291987895966, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025532249361276627, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.019424108788371086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014202154241502285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013271557167172432, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012859643436968327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011973205953836441, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010175129398703575, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009841980412602425, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008120001293718815, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006595918908715248, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014202154241502285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014202154241502285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1052827313542366, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09625893831253052, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08575455844402313, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07759062945842743, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04885651171207428, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.041689254343509674, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06889767944812775, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06231103837490082, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.049964774399995804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.043187279254198074, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04246537387371063, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03508957102894783, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.029722679406404495, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.023668251931667328, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.022016562521457672, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017524104565382004, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01251156721264124, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011475899256765842, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01140973437577486, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0103733716532588, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.009180989116430283, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008941794745624065, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0070227961987257, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005870644003152847, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017524104565382004, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017524104565382004, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2686889171600342, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.24564114212989807, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.23377925157546997, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.2107134461402893, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.12623794376850128, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11562053114175797, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.15371379256248474, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13882280886173248, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12893564999103546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.110356904566288, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.10619152337312698, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07867556810379028, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06649687141180038, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.060702383518218994, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.059249669313430786, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03947974741458893, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.031157812103629112, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.030062362551689148, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.027904827147722244, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02694074995815754, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.020308244973421097, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.019460955634713173, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017808808013796806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01222721952944994, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017808808013796806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.017808808013796806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1932489424943924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16788814961910248, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1588362604379654, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13859596848487854, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08937019854784012, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07890652120113373, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10599662363529205, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09527743607759476, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09190669655799866, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07285080850124359, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06801953166723251, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05439913272857666, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04573268070816994, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04304906725883484, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04239116981625557, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02720080316066742, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02245323546230793, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.021757392212748528, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0191721823066473, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01873685047030449, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014382426626980305, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014283997938036919, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013424755074083805, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009882081300020218, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014382426626980305, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014382426626980305, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1544802188873291, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14341872930526733, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13856224715709686, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12493662536144257, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07323785871267319, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0686815083026886, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08421404659748077, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07761581987142563, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07422929257154465, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06456931680440903, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06151827424764633, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.043079908937215805, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.037330422550439835, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03536442667245865, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.034883879125118256, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021577827632427216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018546633422374725, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018159804865717888, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016830410808324814, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016531767323613167, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011540901847183704, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011880549602210522, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010884431190788746, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00839000940322876, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018159804865717888, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018159804865717888, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.20806196331977844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19358178973197937, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18772022426128387, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1695743203163147, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09945221245288849, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09378866106271744, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11284072697162628, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10415983200073242, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10071129351854324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08793025463819504, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08377090841531754, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05834750831127167, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05095048248767853, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.048851244151592255, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.048348527401685715, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.029430415481328964, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027011873200535774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026606421917676926, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024909034371376038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024616921320557594, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0166238471865654, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018915360793471336, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015965867787599564, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.015237211249768734, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0166238471865654, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0166238471865654, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22364011406898499, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19624117016792297, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1847859025001526, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16344892978668213, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10230448096990585, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09112346172332764, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12321288138628006, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11214517056941986, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10553698241710663, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08562539517879486, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08111292123794556, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06246266886591911, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.053752023726701736, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04930020496249199, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04820208251476288, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031368695199489594, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025934142991900444, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025391552597284317, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022697463631629944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021981189027428627, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01680470071732998, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016980048269033432, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015294886194169521, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011895887553691864, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01680470071732998, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01680470071732998, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11518073081970215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10424153506755829, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09325391054153442, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0843312069773674, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0535731315612793, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04564628750085831, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07556837797164917, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06721149384975433, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05495797470211983, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04683320224285126, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04622886702418327, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03870473802089691, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.032266199588775635, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026038097217679024, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02436167374253273, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.019448569044470787, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013890288770198822, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012846260331571102, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01259233895689249, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011551180854439735, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010242102667689323, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00998037587851286, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007900170981884003, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006823072675615549, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013890288770198822, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013890288770198822, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10795664042234421, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09742911905050278, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08262968063354492, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07488619536161423, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04955244064331055, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.039875831454992294, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07519267499446869, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06794943660497665, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.051019277423620224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.043622300028800964, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04363560676574707, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03822890296578407, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03233925253152847, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.024151690304279327, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021777318790555, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.019216133281588554, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012878048233687878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01143608707934618, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011737196706235409, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010257280431687832, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.010034398175776005, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009700126014649868, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0070806220173835754, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006330322939902544, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012878048233687878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012878048233687878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24854084849357605, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21995548903942108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2025585174560547, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1823713332414627, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11463721841573715, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09999166429042816, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.150244802236557, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.13393746316432953, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11837054044008255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09802152961492538, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0954556092619896, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07737887650728226, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06421186774969101, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05531147122383118, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.052982885390520096, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.038973044604063034, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028708556666970253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027058672159910202, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025390425696969032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02387579157948494, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.020153231918811798, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.019044576212763786, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016244875267148018, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.012256325222551823, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016244875267148018, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016244875267148018, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19156894087791443, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16987469792366028, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16260741651058197, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13933388888835907, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0889778807759285, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08125071972608566, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10268554091453552, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09351903945207596, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09060068428516388, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07236500829458237, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06838151067495346, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05275917798280716, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04511325806379318, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.043104954063892365, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04261723533272743, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.026481134817004204, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.022935597226023674, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022440191358327866, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01975182630121708, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.019443849101662636, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014395691454410553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01507600024342537, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013745504431426525, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0111936554312706, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014395691454410553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014395691454410553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1583229899406433, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14728927612304688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14251603186130524, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12865044176578522, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0756169855594635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0711156353354454, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08654245734214783, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07977321743965149, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07656318694353104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0668594166636467, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06378260254859924, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04478241130709648, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03901844099164009, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.037169378250837326, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03672947362065315, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022616179659962654, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020557040348649025, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020207464694976807, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018947653472423553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01868758164346218, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012785414233803749, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014430934563279152, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012200349010527134, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011587608605623245, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012785414233803749, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012785414233803749, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1656246781349182, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1541590839624405, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1495552659034729, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13497668504714966, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07907593995332718, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07458971440792084, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08997929841279984, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08286496996879578, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08005882799625397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06995157897472382, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06666713207960129, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0464681051671505, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04040389880537987, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03871488571166992, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038314640522003174, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023422691971063614, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021219471469521523, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020889967679977417, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01952073909342289, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019289826974272728, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013171600177884102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014655396342277527, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01263201143592596, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011620011180639267, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013171600177884102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013171600177884102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.1876383274793625, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.16546109318733215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.15603630244731903, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.13684441149234772, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.08644254505634308, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.07744134962558746, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1053805872797966, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.09392102062702179, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.08869194239377975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07202469557523727, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.06844178587198257, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05326269939541817, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04565003886818886, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04230642318725586, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.041487157344818115, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.027202511206269264, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.023292461410164833, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.022888313978910446, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02066667377948761, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02015821449458599, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01560792326927185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01640482246875763, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014552230015397072, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012884093448519707, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01560792326927185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01560792326927185, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11156018078327179, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10081395506858826, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09048338234424591, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08159959316253662, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05177854001522064, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.044281966984272, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07176587730646133, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06463776528835297, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05315592512488365, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.045209433883428574, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04442994296550751, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03666556626558304, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030985943973064423, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025149352848529816, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023584933951497078, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018365848809480667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013403422199189663, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012412111274898052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012137101963162422, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011157974600791931, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009680547751486301, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009584618732333183, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0075823841616511345, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0065415468998253345, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018365848809480667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018365848809480667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10529080033302307, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09484876692295074, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08138281106948853, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.073659248650074, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0484447255730629, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03941136598587036, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07310476154088974, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06522752344608307, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0498739518225193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04249676316976547, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04236754775047302, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.037118855863809586, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.031136982142925262, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02361641637980938, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021465737372636795, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.018589982762932777, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012621773406863213, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011317052878439426, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01147953886538744, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010154313407838345, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.009757758118212223, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009435700252652168, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00705089932307601, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006276879925280809, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012621773406863213, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012621773406863213, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2516089677810669, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2200901210308075, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20073412358760834, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1805940717458725, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11573242396116257, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0993209183216095, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.15225964784622192, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1367444545030594, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11994411796331406, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0978834331035614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09569457918405533, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07832109183073044, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06543993949890137, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.055772870779037476, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.053267430514097214, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03935952112078667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028843147680163383, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027062414214015007, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025273850187659264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.023613780736923218, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.020273521542549133, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.019137270748615265, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016313156113028526, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.012072809971868992, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016313156113028526, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016313156113028526, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1103939488530159, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09883817285299301, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09484732151031494, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08258330821990967, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04980818182229996, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04569792002439499, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.058720991015434265, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0520133450627327, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05050492286682129, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04076460748910904, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03868715465068817, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02957010827958584, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02657712623476982, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025670407339930534, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02541559562087059, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015555123798549175, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.015720831230282784, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.015509010292589664, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.014295680448412895, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.014160879887640476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009630578570067883, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.012463739141821861, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.009378742426633835, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011146370321512222, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015555123798549175, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015555123798549175, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15552127361297607, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14525206387043, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1411547213792801, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1272391676902771, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0738159716129303, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06979075074195862, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08373933285474777, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07716528326272964, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07461247593164444, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0652930811047554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06211644411087036, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04277072474360466, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03701063245534897, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03551143407821655, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.035147231072187424, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021392719820141792, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018393203616142273, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01807243749499321, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016670694574713707, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016445549204945564, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011283849366009235, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01138531044125557, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010770559310913086, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007728891912847757, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018393203616142273, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018393203616142273, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.10417035967111588, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.09703023731708527, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.0942043736577034, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.08491005748510361, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.04936779662966728, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.046612005680799484, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.056124310940504074, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.051617737859487534, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.04992597550153732, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.04358482360839844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.04151025414466858, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.028741685673594475, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.024851633235812187, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.023845411837100983, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.023605983704328537, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01443335972726345, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.012576492503285408, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.012366998009383678, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.011448112316429615, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.011300861835479736, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.007840949110686779, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.008098606020212173, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.007512833923101425, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.005863463040441275, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01443335972726345, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01443335972726345, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.10523741692304611, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.09232056885957718, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.08629787713289261, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.07575786858797073, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.04860834404826164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.04309821128845215, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.06094573438167572, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.05425964295864105, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.05013483390212059, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.040621962398290634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.03892214596271515, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.031496260315179825, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.02660190872848034, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0240317452698946, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.023389894515275955, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.016288258135318756, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.013588851317763329, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.01328612957149744, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.012190405279397964, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.011811086907982826, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.009642013348639011, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.010021730326116085, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.008844497613608837, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.008132439106702805, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.016288258135318756, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.016288258135318756, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } } ], "last_module_idx": 66, "base_perplexity": 7.183070353142565 }