{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.011516222730278969, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.009946178644895554, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.004965895786881447, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0052599008195102215, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.005259649362415075, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0021298094652593136, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.011325421743094921, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.00986222643405199, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.005442788824439049, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.004817623179405928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.005074572283774614, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.005304637830704451, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.004816149361431599, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0028677613008767366, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0021963142789900303, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0028311111964285374, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0019714462105184793, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0016631220933049917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.001915354048833251, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0016123336972668767, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0018914982210844755, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0019152200547978282, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0015196186723187566, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0015911641530692577, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.011574050411581993, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.009997705928981304, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.004892703145742416, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.005148574709892273, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.005148190073668957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.001971515128389001, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.011482193134725094, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.009886334650218487, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.005329708568751812, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.004676963202655315, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.004902842454612255, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.00511775491759181, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0046748751774430275, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.002718802075833082, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.001981121487915516, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.002684823703020811, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.001717100152745843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0013755910331383348, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0016500966157764196, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0013096998445689678, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.001656644162721932, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.001649727113544941, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0012406300520524383, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0012832244392484426, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11732777208089828, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07157032936811447, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04415346682071686, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.05084093660116196, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.05079258233308792, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.024459196254611015, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.07963594794273376, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06560610979795456, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05534786731004715, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.031897082924842834, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03823007270693779, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04328346252441406, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.031559526920318604, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02464585192501545, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02275022305548191, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.022115003317594528, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01301188487559557, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011187727563083172, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009464791044592857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007918260991573334, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.011396360583603382, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009405961260199547, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00733035197481513, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006110208109021187, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10399339348077774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06632126867771149, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04192286729812622, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.04611274227499962, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04545317217707634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.023248333483934402, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06721125543117523, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060207705944776535, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04956688731908798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02959953434765339, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03199730068445206, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03451173007488251, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029141828417778015, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02265508659183979, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.020862771198153496, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017426999285817146, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012995314784348011, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01172957755625248, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01041831448674202, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.009258182719349861, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009689065627753735, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010354920290410519, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007659011986106634, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.008117607794702053, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.11869213730096817, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.11097919940948486, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.10855196416378021, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.09940053522586823, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.05366049334406853, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.05147261917591095, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.05970853939652443, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.055096350610256195, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.054108329117298126, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.04881494492292404, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.04711800068616867, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.030352797359228134, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.026426218450069427, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.025826197117567062, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.025688843801617622, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.015255269594490528, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.013902914710342884, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01377208810299635, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.013021668419241905, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.012945073656737804, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.00852638203650713, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009259208105504513, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.008343085646629333, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007164906710386276, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13681308925151825, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1293710321187973, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12705379724502563, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11643776297569275, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06215749308466911, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06009490787982941, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.06860331445932388, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06342732161283493, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0626005232334137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05692502483725548, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0547904334962368, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03457416594028473, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.030045578256249428, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.029505550861358643, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.029378648847341537, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01723599061369896, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.015090849250555038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.014960295520722866, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.014068908989429474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.013990845531225204, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009001931175589561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.008960755541920662, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.00882382970303297, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00584032479673624, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.11404669284820557, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.09617435932159424, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.08819855004549026, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.0782637670636177, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.050196196883916855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.04319199174642563, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.06409215927124023, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.05698951706290245, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.05249408632516861, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.041234780102968216, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0396931990981102, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.03219200670719147, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.027272174134850502, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.02431836538016796, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.023578325286507607, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.01627426967024803, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.013185347430408001, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.012837459333240986, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.011542177759110928, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.011076274327933788, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0090929651632905, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.00923425517976284, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.008110894821584225, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.006940085906535387, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.022985871881246567, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.015533645637333393, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.008960109204053879, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.009738768450915813, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.009428001940250397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.004300868138670921, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.016113126650452614, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.014565050601959229, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.01076146773993969, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.007119026035070419, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.007626470644026995, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.008155018091201782, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.006952361203730106, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.004709904547780752, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.004017071798443794, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.004089743830263615, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0027088425122201443, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0022661236580461264, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.002325971843674779, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0018923678435385227, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0022181286476552486, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0023007348645478487, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0014965500449761748, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0016889441758394241, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.019579103216528893, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.013518130406737328, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.007443813607096672, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.008128460496664047, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.007903434336185455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0033458895049989223, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.014497848227620125, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.01285391766577959, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.009044636972248554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.006199433468282223, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.006732793990522623, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.007227729540318251, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.006086958572268486, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.003925321623682976, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0032182505819946527, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.003622685791924596, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0022162015084177256, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.001792362192645669, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.001924609998241067, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0014928753953427076, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.001930709695443511, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.001906831399537623, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0012053060345351696, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0013327336637303233, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1357295662164688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09043239057064056, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06581448763608932, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06386996805667877, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.058229442685842514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03502599895000458, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.08249500393867493, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.07480981200933456, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.06438077986240387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03917797654867172, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.040142644196748734, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.042006202042102814, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03566913679242134, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02821158617734909, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.026202701032161713, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.020961444824934006, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.014810767024755478, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012985633686184883, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011179756373167038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00958437379449606, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.010885275900363922, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.010548258200287819, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.008227306418120861, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006760433781892061, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1648389995098114, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.13318152725696564, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.11907266080379486, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0980859100818634, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.07632317394018173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.06288811564445496, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09773320704698563, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08644896000623703, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.07861161977052689, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05496462807059288, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0531233586370945, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.050376638770103455, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04224948585033417, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.03774523362517357, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03661687672138214, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.025550810620188713, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.021194076165556908, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.020208463072776794, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01749793440103531, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.016754640266299248, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014359441585838795, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015582644380629063, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.012900064699351788, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012477613054215908, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1613745093345642, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15272773802280426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14999040961265564, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13662567734718323, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07474704086780548, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07211809605360031, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08282493054866791, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07611904293298721, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07521943002939224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06791628897190094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06539437174797058, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04234210029244423, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.036866385489702225, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03626294806599617, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03611881285905838, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021391509100794792, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01978401094675064, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.0196403656154871, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01855134405195713, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018473545089364052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012101074680685997, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013441026210784912, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011937005445361137, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010706454515457153, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1935919225215912, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1836695671081543, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18063479661941528, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.16484206914901733, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08996592462062836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08693700283765793, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09908465296030045, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09141393005847931, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09049604833126068, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08187415450811386, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07871124148368835, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.050526756793260574, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043922003358602524, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04330366477370262, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04315107315778732, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025301555171608925, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02305048331618309, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.022862179204821587, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0215300302952528, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.021440889686346054, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013798260129988194, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014940764755010605, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013604938983917236, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011256342753767967, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.012854441069066525, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.011553662829101086, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.008966071531176567, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.008119038306176662, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.006453778129070997, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.004246675409376621, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.00985503289848566, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.009430688805878162, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.006632837932556868, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.005776112899184227, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0054773250594735146, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.00519787659868598, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0050856624729931355, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.00327725475654006, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.002646134700626135, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0027976937126368284, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.0018816891824826598, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.0015279045328497887, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0017647637287154794, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.0013757090782746673, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0016330383950844407, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0016502703074365854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.0010511275613680482, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0011228960938751698, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.05763109028339386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.04541769623756409, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.03854035586118698, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.03477160632610321, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.025470050051808357, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.019441764801740646, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.03471656143665314, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03165319561958313, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.027145300060510635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.019710101187229156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.019402040168642998, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.01758238486945629, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.015089605003595352, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.012329572811722755, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.011597909964621067, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.008789267390966415, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0065160952508449554, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.005963589996099472, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.005406054202467203, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00490233302116394, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00458145048469305, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004584441427141428, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.003626123070716858, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003064129501581192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.05868940427899361, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.044318098574876785, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0354095883667469, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.03267555311322212, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.025374986231327057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.017782039940357208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03634483367204666, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.033143412321805954, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.02747989259660244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.01922295242547989, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.01928291656076908, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01839948631823063, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01572592556476593, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01234314776957035, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.011414408683776855, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009185426868498325, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.006590278819203377, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.005896498449146748, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0054289414547383785, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.004774797707796097, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004805720876902342, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004821693059056997, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00367914792150259, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0032500424422323704, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1656222939491272, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.13813373446464539, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1268152892589569, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.11213748157024384, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0757303237915039, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.06403155624866486, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09291236102581024, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08434747904539108, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.07868853956460953, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.059782952070236206, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.05708823353052139, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04722483456134796, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04036635905504227, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.03639819845557213, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.03541715815663338, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02362363412976265, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.018739880993962288, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.017820697277784348, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.015530663542449474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.014831198379397392, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01234652940183878, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0118783637881279, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.010916163213551044, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007549621630460024, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.17106182873249054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.14986640214920044, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.13942968845367432, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.11644385755062103, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.07993924617767334, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07029940187931061, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09935677796602249, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08952851593494415, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08194415271282196, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06428451091051102, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.059277359396219254, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05176845192909241, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.043801601976156235, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.039426062256097794, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03836002200841904, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02636435627937317, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.021952249109745026, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.021166019141674042, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019139491021633148, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018471717834472656, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015092534944415092, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015892520546913147, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013729715719819069, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012596875429153442, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2051875740289688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1931820660829544, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18948844075202942, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1715765744447708, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09624194353818893, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09233679622411728, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10663057118654251, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09821543097496033, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09698311239480972, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08626755326986313, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08229011297225952, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05446993559598923, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04711304232478142, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04623771458864212, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.046026717871427536, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.027268456295132637, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.024195155128836632, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02395487390458584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.022241441532969475, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.022120194509625435, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014713367447257042, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01514931209385395, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014437643811106682, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010840115137398243, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23358529806137085, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2203090786933899, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.216195210814476, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19590142369270325, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10955247282981873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10517656803131104, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12100902199745178, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11167486011981964, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1103789284825325, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09832125157117844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09369277954101562, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06158391013741493, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.053341832011938095, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05240199342370033, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.052183885127305984, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03076615184545517, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026935068890452385, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02666916511952877, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02467011660337448, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02453584596514702, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016235992312431335, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016160449013113976, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01592964492738247, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01074717752635479, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.18422500789165497, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.16113892197608948, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.15170170366764069, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.13377343118190765, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.08312727510929108, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.07425428926944733, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.09974499046802521, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.09087441116571426, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.0858316421508789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0696481317281723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.06600460410118103, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05068475008010864, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04349130392074585, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04004056006669998, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.03918663412332535, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.025462649762630463, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.021202167496085167, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02079150453209877, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.018627630546689034, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.018088210374116898, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013832705095410347, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.014051337726414204, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.012702247127890587, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010110653936862946, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.045213744044303894, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.03790950030088425, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.03370540589094162, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.02988031506538391, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.020488755777478218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.016931939870119095, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.02700980193912983, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.024582428857684135, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.021395158022642136, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.01650070585310459, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.01596941612660885, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.01369860116392374, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.011736154556274414, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.009919061325490475, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.00944592896848917, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0068673864006996155, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.005253002978861332, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.004907434806227684, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.004483112599700689, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.004167563281953335, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0036307605914771557, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0036377019714564085, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0030038906261324883, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0024905125610530376, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.04452397674322128, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.036301691085100174, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.03105509839951992, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.027624700218439102, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.019797254353761673, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.015504848212003708, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.027466993778944016, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.024982603266835213, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.02089797891676426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.01574130170047283, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.015488767065107822, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.013932795263826847, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.011901630088686943, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.009566656313836575, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.008938800543546677, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.006967813242226839, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.00502180028706789, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.004569636657834053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.004229472018778324, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.003793110139667988, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.003610586281865835, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.003533183131366968, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0027836100198328495, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0022755181416869164, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.17352744936943054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.14745894074440002, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.13691744208335876, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.12049509584903717, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.07948918640613556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0688314437866211, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09588594734668732, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08794748038053513, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08225289732217789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06388277560472488, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06072583049535751, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04869468882679939, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04198920354247093, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.03812910616397858, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.03717290237545967, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.024299079552292824, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.019514694809913635, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.018657997250556946, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01633497141301632, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.015670299530029297, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012496666051447392, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012156720273196697, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.011118105612695217, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007523140404373407, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.18781188130378723, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1619080901145935, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15192966163158417, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1263023316860199, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08762861788272858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07789754867553711, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10399840027093887, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0944221019744873, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08947784453630447, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06897604465484619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06204807013273239, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.053655315190553665, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04586057364940643, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.042797308415174484, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0420653373003006, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02708117850124836, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023290330544114113, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022623863071203232, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019841039553284645, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.019371243193745613, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015068729408085346, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01608879491686821, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014103496447205544, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012460839003324509, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.20917968451976776, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19706489145755768, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19326843321323395, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.17539873719215393, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09842419624328613, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09448869526386261, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10893803834915161, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10044767707586288, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09919589757919312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08836084604263306, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08451863378286362, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.055629804730415344, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.048143938183784485, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.047247741371393204, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04704895615577698, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02784823812544346, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.024590734392404556, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.024343157187104225, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02259710617363453, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.022464441135525703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014932766556739807, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015207027085125446, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014653690159320831, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010649232193827629, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24216097593307495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22832821309566498, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22413142025470734, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20365126430988312, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11403566598892212, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10953227430582047, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12625916302204132, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11626692116260529, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11494041979312897, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10248813778162003, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09810633212327957, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06443319469690323, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05563017353415489, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05464465543627739, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.054412923753261566, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03222062066197395, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02818608097732067, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02791517972946167, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02587144263088703, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025723915547132492, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01720409281551838, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017055155709385872, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0168905109167099, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01151361782103777, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21135751903057098, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18584416806697845, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17611151933670044, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15606649219989777, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09589751809835434, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08633609116077423, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1136905699968338, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10363403707742691, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09886156022548676, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08078362047672272, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07663840055465698, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05778822675347328, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04960356652736664, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0461292639374733, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04527219012379646, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029070692136883736, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024255352094769478, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023854976519942284, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021334197372198105, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.020792614668607712, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015773814171552658, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015787390992045403, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014637592248618603, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011174800805747509, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.05948769673705101, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.050449322909116745, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04513690248131752, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.04027535021305084, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.027075229212641716, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.022577885538339615, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.03574075177311897, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03234461694955826, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.028201302513480186, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.022117676213383675, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.02146393433213234, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.018234677612781525, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.015499673783779144, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.013137688860297203, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.012527276761829853, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.009146621450781822, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.007011245470494032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.006571286357939243, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.00606562290340662, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00567082641646266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.004880643915385008, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004906835965812206, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0040611401200294495, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003452627919614315, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.056324515491724014, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.04648752883076668, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04054190218448639, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.03629001975059509, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.025136342272162437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.020173847675323486, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03449635207653046, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.031042151153087616, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.026542099192738533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0203094482421875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.01994762197136879, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01755637302994728, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.014857339672744274, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.012183374725282192, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.011472580023109913, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.008815824054181576, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.006461561657488346, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0059673781506717205, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005535739939659834, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0050581227988004684, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004637617152184248, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0045643760822713375, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0036601945757865906, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003088844008743763, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1864609271287918, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16074705123901367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15060710906982422, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1331884264945984, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08568394184112549, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07543589919805527, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10199170559644699, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09378483146429062, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08845929801464081, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06995773315429688, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06641410291194916, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05186784267425537, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04478476569056511, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04114434868097305, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04024120792746544, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.025899261236190796, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.021115846931934357, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02030659280717373, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01790967397391796, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.017299752682447433, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013402873650193214, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013119560666382313, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012099076993763447, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008295702748000622, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.17586787045001984, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1543768048286438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1462813764810562, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.12169602513313293, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0819489061832428, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07384102046489716, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09773257374763489, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0879402831196785, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08373666554689407, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06597929447889328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.05989423766732216, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05036632716655731, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04285843297839165, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04016478732228279, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03951609134674072, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.025480611249804497, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.022012703120708466, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02144755981862545, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019095050171017647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0186851117759943, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014243164099752903, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015336965210735798, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013399915769696236, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012049169279634953, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.19034259021282196, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1784335970878601, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17444641888141632, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.158107727766037, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08962921798229218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08551925420761108, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10020657628774643, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09218306094408035, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09052243083715439, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07997653633356094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07646854221820831, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05114017799496651, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04419099539518356, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04306294769048691, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04279589653015137, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02564249001443386, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.022395310923457146, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.022122297435998917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020471878349781036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.020307110622525215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013777690939605236, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013859505765140057, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013406135141849518, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009627648629248142, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24311454594135284, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22849194705486298, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22382470965385437, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2030647099018097, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1146879568696022, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10969648510217667, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12768064439296722, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11745291948318481, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11571551859378815, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1026005670428276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09813804179430008, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06505951285362244, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05622314661741257, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05498332530260086, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05469673126935959, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032627884298563004, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028316354379057884, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027997801080346107, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025875339284539223, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0256913173943758, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01740949973464012, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017097560688853264, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016998471692204475, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011408226564526558, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21041303873062134, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18631714582443237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1769125759601593, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1565249264240265, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09576485306024551, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0866631343960762, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.113956019282341, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10346066951751709, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09857257455587387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08121240884065628, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07695876061916351, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05800103396177292, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04963032901287079, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04615579545497894, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0453173890709877, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029206397011876106, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024425696581602097, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02402133122086525, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021637409925460815, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021097857505083084, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01590673252940178, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016080055385828018, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.0147521011531353, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0115955900400877, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.07236135751008987, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06315521150827408, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05803793668746948, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05173764377832413, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03318897634744644, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.02884329855442047, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04215850681066513, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03839006647467613, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.034264739602804184, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02775675803422928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.02671089582145214, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.021391015499830246, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.018355265259742737, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.016046473756432533, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.015453572385013103, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0107222655788064, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.008459808304905891, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008027391508221626, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007405756041407585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007020246237516403, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00567433750256896, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005718395113945007, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004864959046244621, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003934662789106369, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06461703777313232, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05543337017297745, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.049866244196891785, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.044413961470127106, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.02931654267013073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.024717751890420914, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.038313303142786026, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03503994643688202, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.030468318611383438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.024218706414103508, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.023476410657167435, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.019422387704253197, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.016697518527507782, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014107789844274521, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01343753095716238, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009702267125248909, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007321490440517664, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006839755456894636, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006314597092568874, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0058601014316082, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005021780729293823, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004907931201159954, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004122762940824032, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0031249241437762976, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20151068270206451, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17924322187900543, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17102156579494476, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15171772241592407, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09358051419258118, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0850522592663765, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10840142518281937, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09976081550121307, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09568403661251068, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07859206944704056, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07424037158489227, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05499289929866791, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.047586988657712936, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04478149116039276, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04410597309470177, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027415672317147255, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0227196104824543, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022066453471779823, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019618500024080276, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019146867096424103, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014073474332690239, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013573908247053623, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013071809895336628, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008176836185157299, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2004038691520691, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18068993091583252, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17349417507648468, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15114764869213104, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09388380497694016, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0864618644118309, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.108067587018013, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09892522543668747, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0953962579369545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07831642776727676, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07376255095005035, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.055630944669246674, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047679781913757324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04533759877085686, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.044771384447813034, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028007742017507553, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02393953874707222, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02339540421962738, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02091994881629944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02056068554520607, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015222006477415562, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015513353049755096, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014431549236178398, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011229204013943672, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.18078558146953583, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1696293205022812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16589108109474182, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.15035003423690796, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08511320501565933, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0812121257185936, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09497135132551193, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08755435049533844, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08594399690628052, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07599911838769913, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07247215509414673, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04832954332232475, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04187145456671715, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.040792979300022125, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04053071141242981, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024171683937311172, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02098267711699009, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020713476464152336, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019138725474476814, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01897640898823738, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01274914387613535, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012662136927247047, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012384223751723766, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008393879979848862, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23939631879329681, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22523951530456543, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22064125537872314, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2003784328699112, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11295708268880844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1080877035856247, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12533104419708252, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1157085970044136, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11394262313842773, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1011967808008194, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09658275544643402, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06378752738237381, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05528145283460617, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05404865741729736, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.053753264248371124, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03186751902103424, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027631070464849472, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027315398678183556, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02522645890712738, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02503730170428753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016656544059515, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016384560614824295, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016244826838374138, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010539768263697624, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.20581483840942383, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18388277292251587, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17465272545814514, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15537358820438385, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09428699314594269, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08550043404102325, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11276700347661972, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1023273691534996, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09682104736566544, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0805785283446312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0766555592417717, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05748609080910683, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04906104877591133, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04541435092687607, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04453031346201897, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.028943220153450966, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.023952534422278404, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02350231632590294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021305952221155167, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02073649875819683, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015763582661747932, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015703322365880013, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014522437937557697, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011162620037794113, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.06639862805604935, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05760689079761505, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05191978067159653, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.04627435654401779, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.030353626236319542, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.025768382474780083, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.040231041610240936, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03642791137099266, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.031447771936655045, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02532367780804634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.024590522050857544, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.020433951169252396, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.017416302114725113, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.014696442522108555, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.013996869325637817, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010232606902718544, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.007793115451931953, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.007303227204829454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006819679401814938, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.006361558102071285, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005382116883993149, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005410735495388508, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004410790279507637, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003715250175446272, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06000518426299095, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.051148444414138794, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04506251960992813, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04016738384962082, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.027087368071079254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.022295918315649033, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03690667450428009, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03356446325778961, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.028269462287425995, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02234264835715294, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.021791350096464157, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.018641319125890732, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.015995515510439873, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.013085376471281052, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.012312477454543114, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009340593591332436, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.006864798720926046, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006328456103801727, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005937520414590836, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.005418872926384211, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004865341819822788, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004773216787725687, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.003845800179988146, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0030971537344157696, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1804916113615036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16049647331237793, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15215016901493073, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.13517917692661285, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08380237966775894, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07566630840301514, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09904612600803375, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09102582186460495, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08582223206758499, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07053852826356888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06691869348287582, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05027566850185394, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.043426938354969025, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.040152888745069504, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.03934833034873009, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.025085357949137688, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.020481042563915253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.019797107204794884, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.017762359231710434, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01721985638141632, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012890264391899109, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012514728121459484, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01170562207698822, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007678671274334192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19129976630210876, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16679567098617554, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15656283497810364, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13540247082710266, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08854121714830399, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07858505845069885, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10750457644462585, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09735912829637527, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09139508754014969, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07227945327758789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06807883828878403, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.055379368364810944, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047058604657649994, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.043044865131378174, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04206044226884842, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.027841603383421898, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023043615743517876, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02224680781364441, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019945785403251648, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.019301317632198334, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015060629695653915, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015636324882507324, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01372495200484991, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01152835413813591, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1798301786184311, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1690727323293686, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16537676751613617, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1503077894449234, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08475670218467712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0809599980711937, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09474700689315796, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08730907738208771, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08556672185659409, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07594679296016693, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07262811809778214, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04825755953788757, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04177100211381912, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.040616802871227264, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0403544120490551, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02415287122130394, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020959345623850822, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020692382007837296, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01917826384305954, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01901034452021122, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012795239686965942, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012742841616272926, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01240907609462738, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008548827841877937, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23449406027793884, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22101305425167084, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21657732129096985, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19694827497005463, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11068235337734222, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10601261258125305, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12312863767147064, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11356448382139206, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11167491972446442, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09939082711935043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09509521722793579, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06266967952251434, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05428239703178406, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.052986081689596176, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05268685147166252, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03133276477456093, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027143457904458046, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026828067377209663, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024841183796525, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02465067431330681, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016426270827651024, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016189957037568092, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015988366678357124, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010515069589018822, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21635249257087708, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19403338432312012, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18524129688739777, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16431106626987457, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09952178597450256, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09093105792999268, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11802167445421219, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10679667443037033, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10202741622924805, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08516291528940201, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08091641217470169, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0601554811000824, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05129369720816612, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04798687994480133, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.047188930213451385, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030266208574175835, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02539948932826519, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024999799206852913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022639289498329163, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022127117961645126, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01646716520190239, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01667308621108532, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015367879532277584, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012053422629833221, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.07393555343151093, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06595003604888916, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05969547480344772, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.053196825087070465, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03407681733369827, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.029368622228503227, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.045882903039455414, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04134572297334671, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.034975241869688034, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02914639189839363, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.02828250825405121, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.023360954597592354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.019803527742624283, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.016534743830561638, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0156718622893095, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011699032038450241, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.008805831894278526, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008253015577793121, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007862864062190056, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007325285580009222, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006179823540151119, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0062055098824203014, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004990664776414633, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0043197935447096825, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06280536204576492, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05537823215126991, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04886801540851593, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04341251030564308, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.028597626835107803, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.023872407153248787, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03980758413672447, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0361664704978466, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.029511505737900734, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.024288883432745934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.02373751625418663, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.020159171894192696, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01728203520178795, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.013841038569808006, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.012921564280986786, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010116592980921268, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007289828732609749, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006687819492071867, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0064498004503548145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0058481390587985516, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0052348654717206955, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005171189084649086, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004035523161292076, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0033796592615544796, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.191977396607399, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17330452799797058, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16588912904262543, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.14727847278118134, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08956167101860046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0822678655385971, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10455400496721268, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09550881385803223, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09122786670923233, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07620549201965332, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.072130486369133, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05315779149532318, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.045640505850315094, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04293418303132057, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04228692501783371, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02652466483414173, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02191915363073349, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.021321121603250504, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019170448184013367, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01873803697526455, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013657725416123867, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013275200501084328, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012597649358212948, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00827704556286335, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19778671860694885, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1768297404050827, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16766446828842163, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14500972628593445, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09285681694746017, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08392643183469772, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11235759407281876, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10076496005058289, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09475373476743698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07703471928834915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07277026027441025, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.058011624962091446, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.049021825194358826, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.045422300696372986, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04452604055404663, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029244692996144295, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024671487510204315, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023961948230862617, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021716943010687828, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021175766363739967, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016218192875385284, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017042415216565132, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015076104551553726, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013102463446557522, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1756584346294403, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16542978584766388, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16202494502067566, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14739136397838593, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08297005295753479, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0793672427535057, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09248736500740051, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08515404164791107, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08371254801750183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07444620877504349, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07136071473360062, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.047288283705711365, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040833521634340286, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.039850831031799316, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03961740434169769, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02366841770708561, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02068966068327427, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020449494943022728, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01898765191435814, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018841316923499107, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012688461691141129, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01275564543902874, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01236643921583891, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008817681111395359, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23623013496398926, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22287684679031372, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21855950355529785, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19894294440746307, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11176953464746475, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10717296600341797, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12438064068555832, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11433178931474686, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1127203106880188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1004600077867508, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09629865735769272, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06359735876321793, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05479476973414421, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.053641438484191895, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05337834730744362, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03185127303004265, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02772890031337738, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027428213506937027, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025442540645599365, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025272361934185028, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017080705612897873, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016878638416528702, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016704127192497253, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011470142751932144, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2103821039199829, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1878090351819992, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17874133586883545, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15936817228794098, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.0964510515332222, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08765123039484024, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1148301362991333, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10421310365200043, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09917178004980087, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08250260353088379, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07880686223506927, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05867565795779228, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04997752234339714, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04649798572063446, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.045643217861652374, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029489757493138313, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024539437144994736, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024130944162607193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021838227286934853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021291906014084816, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01601390913128853, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016061212867498398, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01484858337789774, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011477946303784847, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.07352746278047562, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06517719477415085, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.060344476252794266, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05329336225986481, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.033884163945913315, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.029841454699635506, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04258393123745918, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.038877613842487335, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.03475981578230858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.028468646109104156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.027228621765971184, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02158455178141594, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.018558844923973083, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.01633261889219284, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.01576986163854599, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010807310231029987, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.008551263250410557, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008137972094118595, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007488899864256382, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007120666094124317, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00567875849083066, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0056799850426614285, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004898360464721918, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0038359479513019323, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06337204575538635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05501655861735344, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05006973817944527, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04407221078872681, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.028830943629145622, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.02467045560479164, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03715752065181732, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03386855497956276, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.02977123111486435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.023843996226787567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.022866740822792053, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.018837682902812958, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.016194360330700874, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.013866703025996685, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.013264281675219536, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009425119496881962, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007205488160252571, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006775290705263615, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0062204208225011826, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.005822636187076569, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004913661628961563, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00480815302580595, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004098817706108093, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0031067749951034784, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.18049679696559906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16091220080852509, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1531624048948288, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.13502898812294006, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08367263525724411, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07603777199983597, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09858789294958115, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08988863229751587, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08552204817533493, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07027005404233932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0662628784775734, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.050199251621961594, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.042946942150592804, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04010771960020065, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.03940523788332939, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.025091296061873436, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.020591294392943382, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01997893676161766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01785493828356266, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01739254593849182, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012966553680598736, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012688052840530872, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.011849991045892239, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008102157153189182, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20236709713935852, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18225260078907013, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17415092885494232, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1521812528371811, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0950075313448906, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08695606887340546, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11174219846725464, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10148343443870544, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09672766923904419, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07982631772756577, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07495465129613876, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.057467326521873474, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04885004833340645, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.045944198966026306, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04522498697042465, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0287429578602314, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024121075868606567, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023490097373723984, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02111666649580002, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020654551684856415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015281891450285912, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01555674523115158, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014331945218145847, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011005994863808155, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17396506667137146, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16399206221103668, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16056784987449646, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14603416621685028, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08217542618513107, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07861428707838058, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0917171910405159, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08437852561473846, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08289529383182526, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07372723519802094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07061655074357986, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.046813856810331345, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04042763635516167, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03942778706550598, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.039194848388433456, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023410551249980927, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020422615110874176, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02018536441028118, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018735209479928017, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0185864195227623, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01244411338120699, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012528929859399796, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01210849080234766, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008575281128287315, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23326173424720764, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22005100548267365, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21574337780475616, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19630616903305054, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11039112508296967, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1057787612080574, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1227862760424614, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1130119264125824, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.111356221139431, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09913267940282822, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09486863762140274, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06264302134513855, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05410381034016609, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0529228039085865, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05265070125460625, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03132603317499161, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02723010815680027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026927350088953972, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024943603202700615, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024766087532043457, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01653227210044861, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016404379159212112, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01613404043018818, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010909137316048145, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2196597009897232, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19697606563568115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18813157081604004, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16692087054252625, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10102097690105438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09230642765760422, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11939584463834763, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10825198888778687, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10361134260892868, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0863831415772438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08193749189376831, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.060984108597040176, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05202121287584305, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04875249043107033, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04795484244823456, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03082771599292755, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025842761620879173, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025454817339777946, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023041732609272003, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02253827638924122, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017128195613622665, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017014000564813614, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016073672100901604, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012369003146886826, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09229954332113266, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08208223432302475, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07638151943683624, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06755953282117844, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04253556951880455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.037734292447566986, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05326012894511223, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.048366136848926544, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.043585821986198425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03592416271567345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03437962383031845, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.027098868042230606, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.023190872743725777, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02058984711766243, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.019929740577936172, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013602971099317074, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.010931751690804958, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.010459022596478462, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009659807197749615, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00924341194331646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0072539374232292175, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007416127249598503, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006326653994619846, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005272434093058109, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07766332477331161, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06822653859853745, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06262176483869553, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.055257685482501984, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03549393266439438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03082115389406681, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.045342493802309036, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04130798205733299, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03649745509028435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.029695969074964523, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.028536811470985413, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0230239387601614, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.019743341952562332, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.017087290063500404, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01640263944864273, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011528677307069302, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008883095346391201, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.008389418013393879, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007728168740868568, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007287120446562767, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006011857185512781, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005884784273803234, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005078885704278946, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0038329584058374166, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.18647685647010803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16927868127822876, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16289541125297546, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.14355948567390442, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08701078593730927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08053071796894073, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09971106052398682, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09167294204235077, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08840742707252502, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07390925288200378, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06947944313287735, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.050636786967515945, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04370897263288498, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04157717898488045, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04106442257761955, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02526116371154785, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02111922577023506, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.020625732839107513, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018418701365590096, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.018070437014102936, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012948362156748772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012528599239885807, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01217536348849535, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007619223557412624, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2111624926328659, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1894221305847168, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18110843002796173, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15569372475147247, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0991634801030159, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09094454348087311, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11558285355567932, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1053033322095871, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10077950358390808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08164139091968536, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07657422870397568, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05938185751438141, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05063106492161751, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04780473932623863, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04712039604783058, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029698148369789124, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024948330596089363, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02434016764163971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0214754156768322, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021022062748670578, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015709461644291878, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015857234597206116, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014771604910492897, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010999728925526142, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1697273701429367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15933169424533844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15582987666130066, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14171499013900757, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08014202862977982, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0764455571770668, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08954771608114243, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08243760466575623, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08092518150806427, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07166403532028198, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06867855042219162, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04576130211353302, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03955552354454994, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03852245956659317, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03827434405684471, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022923244163393974, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02009345218539238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019846098497509956, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018411725759506226, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01826225407421589, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012264960445463657, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012537394650280476, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011918189004063606, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008828557096421719, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.22838224470615387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21463695168495178, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21009284257888794, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19100460410118103, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10786176472902298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10302219539880753, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12026579678058624, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11065361648797989, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1088922917842865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09647785872220993, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09247337281703949, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06144704297184944, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.053014323115348816, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.051768284291028976, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05148192495107651, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.030769824981689453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026770593598484993, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02645113877952099, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02447945810854435, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024292392656207085, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01639411598443985, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016354579478502274, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01597750559449196, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011132118292152882, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22409436106681824, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20111288130283356, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19224001467227936, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17022448778152466, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10295462608337402, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09423749148845673, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12175288796424866, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11042642593383789, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10556995868682861, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08808054774999619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08334790170192719, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06187070533633232, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05294247716665268, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04960000887513161, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04878103733062744, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03113480657339096, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02614271081984043, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02573545090854168, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02326771803200245, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022735869511961937, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016963720321655273, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017013225704431534, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015857458114624023, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012126441113650799, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.08382079750299454, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07490450888872147, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07008219510316849, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06146163493394852, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03881646692752838, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03466552495956421, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0477760024368763, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04362299665808678, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.03961646184325218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0326077975332737, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.030992398038506508, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02425912581384182, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02085171826183796, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018682800233364105, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.018138505518436432, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012132094241678715, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.009758278727531433, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.009352028369903564, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.008537715300917625, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00818405020982027, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006375470664352179, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006392112001776695, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005603625439107418, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004313592333346605, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07394776493310928, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06535119563341141, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06080341711640358, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.053117599338293076, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03393620252609253, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.029974699020385742, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04221966862678528, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.038495101034641266, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.034717462956905365, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.028246568515896797, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.026828426867723465, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02141297422349453, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.018352029845118523, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.016290122643113136, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.015767395496368408, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010709327645599842, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008424929343163967, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.008026720955967903, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007283450104296207, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.006932819727808237, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005547204986214638, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005438525695353746, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004799036309123039, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003494705306366086, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.17731405794620514, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.15956775844097137, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15307898819446564, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1338859498500824, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08261559158563614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07586933672428131, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09579431265592575, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.08736433833837509, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0840839222073555, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06933700293302536, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06501740217208862, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.048740509897470474, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04172192141413689, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.03955220431089401, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.039026424288749695, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02433861792087555, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.020210040733218193, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.019707826897501945, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.017485080286860466, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01712285727262497, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012564626522362232, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012204868718981743, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.011705592274665833, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007682432420551777, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19238552451133728, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17328593134880066, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16548800468444824, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14515981078147888, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08985546231269836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08213897794485092, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10598412156105042, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09636495262384415, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09155091643333435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07591871917247772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.071745865046978, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05460492894053459, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04641399905085564, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04342703893780708, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.042707063257694244, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.027361709624528885, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.022962380200624466, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02236245945096016, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02026987075805664, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.019798975437879562, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01478262897580862, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01504618301987648, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013833886943757534, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010851421393454075, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16586579382419586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15491724014282227, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1510569155216217, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13677404820919037, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0780939906835556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07416621595621109, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08778027445077896, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08073802292346954, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07894818484783173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06938306987285614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06626947969198227, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04481304809451103, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03872273489832878, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03754059970378876, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.037260860204696655, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022463329136371613, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019592085853219032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019319092854857445, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017868636175990105, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017694562673568726, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012002049013972282, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012266989797353745, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011593643575906754, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008627205155789852, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.21927191317081451, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20501691102981567, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20017455518245697, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18120037019252777, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10328393429517746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09820196777582169, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11570967733860016, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.106389120221138, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10437608510255814, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09170317649841309, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08753415942192078, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05901452526450157, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05089648813009262, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04948923736810684, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04915770888328552, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02949897199869156, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.025476688519120216, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025129230692982674, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023134447634220123, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02291840687394142, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015533601865172386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015431102365255356, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015041684731841087, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010265933349728584, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2316984236240387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20855079591274261, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1996135711669922, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17656828463077545, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10696493834257126, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0979786068201065, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12640605866909027, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11431660503149033, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10951480269432068, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09161880612373352, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08662746101617813, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0645790845155716, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05495795980095863, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05160621553659439, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05080379918217659, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03253709897398949, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027299748733639717, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02689879573881626, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024344176054000854, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023834699764847755, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01785573549568653, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017870739102363586, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01675860770046711, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01293095014989376, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09443170577287674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08417562395334244, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07880430668592453, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06903362274169922, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04380365088582039, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03915157914161682, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05384346842765808, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.048969194293022156, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.044732436537742615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03665376454591751, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03478416055440903, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.027366505935788155, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.023457041010260582, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.021156666800379753, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.020583955571055412, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013721113093197346, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.011162676848471165, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01073173712939024, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009778901934623718, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.009409273974597454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007292456459254026, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007428398355841637, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006471557542681694, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00521785719320178, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07725757360458374, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06842880696058273, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06313561648130417, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.055188242346048355, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03555299714207649, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.031188014894723892, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04488980397582054, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04090602323412895, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.036348097026348114, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.029595909640192986, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.028222065418958664, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02279287949204445, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01955324597656727, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01712164469063282, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01650315709412098, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011394563131034374, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.008947236463427544, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.008494481444358826, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007792723830789328, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0073787453584373, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005976248532533646, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005954297259449959, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005133256781846285, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00398356094956398, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.18491458892822266, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16479486227035522, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15728889405727386, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.13679884374141693, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08574974536895752, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07801947742700577, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10061930865049362, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09117120504379272, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08742667734622955, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07107211649417877, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06657279282808304, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05119408294558525, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.043519943952560425, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04103796184062958, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0404149629175663, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.025615733116865158, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.020987749099731445, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.020405374467372894, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018015414476394653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.017603188753128052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013208218850195408, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01277595479041338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012105046771466732, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008085593581199646, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1983257532119751, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18068119883537292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16610652208328247, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14705632627010345, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09355044364929199, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0825854018330574, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12468340247869492, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11127574741840363, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09510099142789841, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08084557950496674, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07762949913740158, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06446586549282074, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.053816672414541245, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.045696645975112915, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04360395669937134, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03234070912003517, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02468540333211422, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023335494101047516, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022280992940068245, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020970802754163742, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017369024455547333, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017589233815670013, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014564789831638336, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012772961519658566, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16621097922325134, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15485839545726776, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15070559084415436, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13643422722816467, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07828599214553833, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07408754527568817, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08841712772846222, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08122987300157547, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07919685542583466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06935779005289078, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0663074404001236, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04529205337166786, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03905262053012848, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03774336725473404, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03742499276995659, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022727226838469505, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01986985094845295, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01957283355295658, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01812852919101715, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01793726533651352, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012285669334232807, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012701587751507759, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011841337196528912, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009194019250571728, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2195833921432495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20502379536628723, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19992052018642426, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18091890215873718, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10338600724935532, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09805385768413544, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11613030731678009, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10677779465913773, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10451465845108032, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.091641366481781, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0873740091919899, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05909232795238495, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05106218531727791, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04953744262456894, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04917309805750847, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.029564935714006424, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02548827975988388, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02511754259467125, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023103680461645126, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02287266030907631, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015511621721088886, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015427171252667904, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014973500743508339, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010211095213890076, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2346106767654419, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21152523159980774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20273374021053314, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17964401841163635, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10861179977655411, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0997496172785759, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12767618894577026, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11571694165468216, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11123592406511307, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09325908124446869, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08826018124818802, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06543023884296417, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.055728767067193985, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05245055630803108, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05165478214621544, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03298131749033928, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02778705582022667, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027391791343688965, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024827569723129272, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024322863668203354, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01799144595861435, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0182185098528862, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01687532849609852, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013239677995443344, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09969858080148697, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0884806215763092, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0819682776927948, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07184608280658722, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0461503341794014, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.040707435458898544, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05886714160442352, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.052735794335603714, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04723067209124565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03860528767108917, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03708610683679581, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030114438384771347, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.025405623018741608, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.022373896092176437, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.021607588976621628, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01516412477940321, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01194839645177126, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011408621445298195, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010512681677937508, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010028084740042686, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008143549785017967, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008209326304495335, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007004671264439821, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005909860134124756, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08347498625516891, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07350214570760727, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06769873201847076, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.05914417281746864, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03829905390739441, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.033474151045084, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0491379089653492, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04417207092046738, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.039252523332834244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03181857988238335, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03045061230659485, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.025036271661520004, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02118666097521782, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.018499037250876427, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01782444305717945, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012554758228361607, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009742962196469307, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009245016612112522, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008483266457915306, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008031640201807022, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006617433857172728, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00657306844368577, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005631443578749895, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00450543686747551, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.18325267732143402, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16474375128746033, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1573455035686493, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1378890573978424, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08548878878355026, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07815855741500854, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10108988732099533, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09120308607816696, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.087018221616745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07171642035245895, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06752173602581024, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.051445554941892624, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04358779266476631, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.040955498814582825, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04030605033040047, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02575431391596794, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.020966794341802597, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.020375117659568787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018135808408260345, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01770651340484619, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013279383070766926, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012771866284310818, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012184059247374535, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008066009730100632, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21405917406082153, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1878739893436432, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17748111486434937, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15510571002960205, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09982484579086304, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08950070291757584, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11961932480335236, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10782510787248611, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10240000486373901, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08196977525949478, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0777900442481041, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06179776415228844, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05217791721224785, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04852687194943428, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.047647714614868164, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031098896637558937, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02584069035947323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025077305734157562, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022378675639629364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021797729656100273, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016953200101852417, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017189502716064453, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01577877253293991, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012593448162078857, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17029587924480438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15835104882717133, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1542235016822815, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13943049311637878, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08031564950942993, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07598040252923965, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09072379767894745, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08318567276000977, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0812821090221405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07099553942680359, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06795711070299149, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04650017246603966, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04007118195295334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03879011422395706, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038490019738674164, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023354539647698402, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02053805999457836, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020248154178261757, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018730854615569115, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018547147512435913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012742209248244762, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013272836804389954, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012322796508669853, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00978057086467743, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.22807498276233673, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2124272584915161, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20719891786575317, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18730807304382324, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10743758827447891, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10183964669704437, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12040640413761139, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11072558909654617, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10866444557905197, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09496080875396729, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09064386039972305, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0614788718521595, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05302635580301285, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05154569447040558, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0511913001537323, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.030748829245567322, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026634855195879936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026261458173394203, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02411791682243347, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.023899728432297707, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016267908737063408, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01627732627093792, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015771547332406044, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011015129275619984, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2362738400697708, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21184071898460388, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20177243649959564, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17789152264595032, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10895924270153046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0992961898446083, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.129877969622612, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11765120178461075, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11181224882602692, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09281481057405472, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08776747435331345, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06630951166152954, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.056485261768102646, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05257375165820122, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.051624711602926254, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03341326117515564, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027802783995866776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02732454426586628, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024682452902197838, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024067843332886696, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018238916993141174, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018257183954119682, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016915414482355118, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01311061717569828, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10314715653657913, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09266220778226852, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08769583702087402, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0770677998661995, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04790990799665451, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04343116655945778, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.057418134063482285, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05249907448887825, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04881198704242706, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.040386348962783813, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03825260326266289, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02916886657476425, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.025087997317314148, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.023030465468764305, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.022522786632180214, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014583675190806389, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.011967253871262074, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011564635671675205, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010465950705111027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01013761106878519, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007642047945410013, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007648939732462168, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0068970490247011185, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005134908948093653, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08273298293352127, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07429055124521255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06955701857805252, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.061148714274168015, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03826245293021202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.034294549375772476, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04697607085108757, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.042906906455755234, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03898598998785019, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03225269168615341, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.030677540227770805, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02378532849252224, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.020472507923841476, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.018381714820861816, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017849797382950783, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011893222108483315, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009506274946033955, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009110584855079651, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008303189650177956, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00795774906873703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006186879705637693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006099974270910025, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0054515958763659, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003966561518609524, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20046471059322357, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18022501468658447, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17295095324516296, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15152235329151154, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09320831298828125, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08567260205745697, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10813906788825989, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09824514389038086, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09484930336475372, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07815505564212799, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0734860748052597, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05497018247842789, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04686669260263443, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.044557489454746246, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04402301833033562, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02742289938032627, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.022670039907097816, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022098813205957413, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019564546644687653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019188372418284416, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014062088914215565, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013510342687368393, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013065884821116924, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008290225639939308, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22638680040836334, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20429110527038574, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1955026090145111, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17497599124908447, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10595306009054184, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09680303186178207, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12393112480640411, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11297988146543503, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10775924474000931, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09028372913599014, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08640869706869125, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06375990808010101, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.054483458399772644, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.051272835582494736, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05051289498806, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03196769952774048, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027188334614038467, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026497943326830864, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02414804697036743, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.023657748475670815, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01722893677651882, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017870081588625908, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016199590638279915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013031871058046818, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17345477640628815, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16146156191825867, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15716566145420074, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14213378727436066, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08186063915491104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07743385434150696, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09255385398864746, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0848960131406784, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08283501863479614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07242001593112946, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06927743554115295, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.047436244785785675, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0408552885055542, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.039486318826675415, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.039156120270490646, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023771269246935844, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02081483229994774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020506437867879868, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018969040364027023, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01876494660973549, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012831049971282482, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013343739323318005, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01235541608184576, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009692429564893246, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23230473697185516, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21649231016635895, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2112017720937729, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19084548950195312, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10950754582881927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1038348525762558, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12291628122329712, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1129130944609642, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11071427166461945, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09685678035020828, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09233184903860092, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06273885816335678, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.054055772721767426, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05251975730061531, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.052145201712846756, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031385745853185654, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027087995782494545, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026710104197263718, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024536943063139915, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02430691570043564, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01659790240228176, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016486115753650665, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0160568468272686, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01105949841439724, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2454221248626709, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21893522143363953, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2086254358291626, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18350233137607574, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11311669647693634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10283038020133972, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13425275683403015, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12158705294132233, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11617622524499893, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09581925719976425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09049540758132935, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06871403753757477, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05842313915491104, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05457408353686333, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05362478271126747, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.034635864198207855, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028789693489670753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.028333254158496857, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02543419413268566, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024832725524902344, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018880851566791534, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01879916898906231, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01762140542268753, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013455724343657494, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10427312552928925, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09402746707201004, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08832471072673798, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07799983024597168, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.048503775149583817, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04372416436672211, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05961129069328308, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05416461080312729, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04941970109939575, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04119741544127464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03929877653717995, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030308803543448448, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.025920258834958076, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.023394513875246048, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.022764064371585846, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015165645629167557, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012289806269109249, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011814014986157417, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010859169997274876, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01044966746121645, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007996204309165478, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008103515021502972, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007076940964907408, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0056046550162136555, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08214837312698364, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07430090010166168, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06839752197265625, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06053515896201134, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03798629716038704, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03354503959417343, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04896119609475136, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04453954100608826, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0387175977230072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03253249078989029, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03123631328344345, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024768522009253502, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02126140333712101, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01829349994659424, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017552081495523453, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012370768003165722, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009557977318763733, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009037826210260391, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00847303494811058, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007983924821019173, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0064789592288434505, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006398770958185196, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005472925957292318, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004222698975354433, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2082284390926361, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18806049227714539, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18018662929534912, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15821576118469238, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09706511348485947, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08920662105083466, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11444617062807083, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10297956317663193, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09873464703559875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0819215327501297, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07727549970149994, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05842583626508713, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04922682046890259, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04652223736047745, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.045849546790122986, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029185442253947258, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02384675294160843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023221757262945175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020791180431842804, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020360007882118225, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015076414681971073, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014548826962709427, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013834626413881779, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009344098158180714, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2402418553829193, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20962084829807281, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1979014277458191, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17642231285572052, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11268927901983261, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10056629031896591, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13443301618099213, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12135133892297745, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1154353991150856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09238894283771515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08860884606838226, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0691382884979248, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0585823617875576, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05462785065174103, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05366610735654831, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03462376073002815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.028894444927573204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.028077371418476105, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024951206520199776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024320686236023903, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018494633957743645, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018997348845005035, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017229389399290085, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01369703933596611, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16121050715446472, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15031841397285461, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14603757858276367, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13245393335819244, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07618197053670883, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07199965417385101, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08663749694824219, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07953400909900665, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07710562646389008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06762813031673431, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06478162854909897, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04441314935684204, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.038338664919137955, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03683697059750557, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03648356720805168, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022275563329458237, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01953328400850296, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019226476550102234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0178776104003191, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01765921153128147, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012047053314745426, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012705329805612564, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011531362310051918, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00936448946595192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.22859549522399902, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21371938288211823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20843705534934998, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18889813125133514, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10806606709957123, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10262097418308258, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12158344686031342, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11170445382595062, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1092282384634018, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09599892795085907, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0917070060968399, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06212596595287323, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05353396758437157, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05189209803938866, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.051500797271728516, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03110170178115368, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02691294625401497, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026530122384428978, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024501154199242592, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024252276867628098, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016560792922973633, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016607576981186867, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015985634177923203, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011380678042769432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23192688822746277, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20644867420196533, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.196041077375412, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17321330308914185, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10686159878969193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09672389924526215, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1286449432373047, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11581127345561981, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.109905906021595, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.090827576816082, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08614902198314667, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06591575592756271, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.056096151471138, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.051967278122901917, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05093535780906677, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03355484828352928, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02812705561518669, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027587631717324257, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025134554132819176, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02445467747747898, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018812844529747963, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019305411726236343, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017296696081757545, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014617489650845528, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11810654401779175, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10746994614601135, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10159266740083694, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09017832577228546, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05513117462396622, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.050087399780750275, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06753953546285629, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06116047501564026, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05602053552865982, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04738832637667656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.045286569744348526, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03439159318804741, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029369279742240906, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02664126083254814, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025970090180635452, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017308805137872696, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014100560918450356, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01360301487147808, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012595172971487045, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012171436101198196, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00922522321343422, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009386211633682251, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008224603720009327, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006668503396213055, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08741243183612823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07944466918706894, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07288934290409088, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06483582407236099, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04034798592329025, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.035533733665943146, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05296310409903526, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04795738682150841, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.041109006851911545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03491437807679176, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03376104682683945, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0269004013389349, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02295541949570179, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01946965977549553, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018572822213172913, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013470124453306198, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01020247582346201, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.00960378348827362, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00913182646036148, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008556913584470749, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007040910888463259, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006936254445463419, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0058271815069019794, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0046105897054076195, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22268299758434296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2044656127691269, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1969924420118332, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17643733322620392, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10476750880479813, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09758922457695007, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12197482585906982, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11078139394521713, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10635611414909363, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09081462770700455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08650228381156921, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.062153782695531845, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05293874815106392, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05022347345948219, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04954543337225914, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031092053279280663, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.025633109733462334, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025056159123778343, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022755004465579987, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022324338555336, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01601690612733364, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01542789675295353, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01490340381860733, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009662067517638206, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2205522060394287, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19823704659938812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18997353315353394, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16209402680397034, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10296060889959335, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09405341744422913, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12023609131574631, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10915643721818924, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1048969104886055, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0853348821401596, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07868047058582306, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06168187037110329, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05237152427434921, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.049558110535144806, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04888352379202843, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030882887542247772, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02581344172358513, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02515515871345997, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022303326055407524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0218677781522274, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0163813978433609, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016314946115016937, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015451745130121708, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011265052482485771, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1603129655122757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1501213163137436, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14617516100406647, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13270100951194763, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07582269608974457, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07194183021783829, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08590879291296005, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07892554253339767, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07667091488838196, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06768853217363358, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0648263469338417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04399040341377258, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0380462221801281, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.036653630435466766, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0363280326128006, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022099019959568977, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019437076523900032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019152706488966942, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017855914309620857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01765860989689827, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012002876028418541, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012620079331099987, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01153854001313448, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00930857378989458, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23206652700901031, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21808291971683502, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21315337717533112, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19359655678272247, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10968521237373352, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10460637509822845, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12284474819898605, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11304309964179993, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11075493693351746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09806343168020248, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09377837926149368, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06258417665958405, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05406079441308975, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05254649370908737, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05219108983874321, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03126547858119011, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026954518631100655, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026593785732984543, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02457580342888832, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024349931627511978, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01635885238647461, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016163846477866173, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015837937593460083, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010536919347941875, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22662843763828278, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20094841718673706, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19032028317451477, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16810643672943115, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10403107106685638, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09366058558225632, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.124934621155262, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11322349309921265, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1070873960852623, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08781901001930237, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08328627794981003, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06373225152492523, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.054396070539951324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05021306499838829, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.049189310520887375, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.032220229506492615, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.026582196354866028, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.026076463982462883, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02345900610089302, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02280079759657383, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0177666824311018, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017569463700056076, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016412856057286263, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012619685381650925, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11625315248966217, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10644413530826569, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10139557719230652, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0905100405216217, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05422988533973694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.049899518489837646, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06484952569007874, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05915398150682449, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0550224594771862, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.046956684440374374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04479210451245308, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03294018656015396, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028229698538780212, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02605895884335041, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02552204579114914, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016504067927598953, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013541745021939278, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013128760270774364, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012094467878341675, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01174709852784872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008676937781274319, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008624349720776081, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007871933281421661, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005788940470665693, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09024164825677872, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08264939486980438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07677725702524185, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06867213547229767, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04172584041953087, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03738897666335106, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05285538733005524, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.048555899411439896, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04241763800382614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03641069680452347, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03497659042477608, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.026684127748012543, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02309168316423893, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020039938390254974, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019264383241534233, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013369033113121986, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010372443124651909, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009840160608291626, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00930831115692854, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008801004849374294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006949161645025015, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006800277158617973, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005921018775552511, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0043354518711566925, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22763000428676605, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2108629047870636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20416127145290375, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1836906373500824, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10715635120868683, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10055676102638245, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12357952445745468, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11212702095508575, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10853159427642822, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09393472969532013, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08958781510591507, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0630539134144783, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.053611766546964645, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.051275089383125305, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0507177896797657, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031517982482910156, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02607744373381138, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025546735152602196, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023311972618103027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02295251376926899, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016153190284967422, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015434733591973782, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015159480273723602, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009445898234844208, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2316020429134369, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20882944762706757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20107635855674744, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.17723780870437622, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10790537297725677, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10007428377866745, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12435402721166611, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11345770955085754, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11013765633106232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09169118106365204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08610059320926666, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06392283737659454, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05483512952923775, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05221908539533615, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.051610544323921204, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03226745128631592, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027735183015465736, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02720281109213829, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024575250223279, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024188410490751266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017655177041888237, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01814938150346279, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016838565468788147, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013353047892451286, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14768685400485992, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13834983110427856, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13458245992660522, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12242145836353302, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06982677429914474, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0662149041891098, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07956953346729279, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07298225164413452, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07058662176132202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06242915615439415, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05987893417477608, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04072605445981026, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.035149380564689636, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03372518718242645, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.033384375274181366, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02043192647397518, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.017837753519415855, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01755319908261299, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01639324612915516, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016187231987714767, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01105471421033144, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011546866968274117, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010578898712992668, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008431646041572094, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.22460298240184784, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21140053868293762, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2065916806459427, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18786993622779846, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10620207339525223, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10134615749120712, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11919639259576797, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10968390852212906, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10717424005270004, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09514002501964569, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09112237393856049, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06078305095434189, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.052460432052612305, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05089724436402321, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05052942410111427, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.030381541699171066, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026160679757595062, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025804171338677406, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02391396462917328, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.023677408695220947, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01599995791912079, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015796083956956863, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015466493554413319, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010394919663667679, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2189044952392578, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1922893226146698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18088498711585999, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15940162539482117, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09991830587387085, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08904881030321121, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12124425917863846, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11010909080505371, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10328947752714157, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08374492824077606, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07961218059062958, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.061941009014844894, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05294622480869293, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04831071197986603, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.047152262181043625, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031311918050050735, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025654250755906105, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025094307959079742, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022545961663126945, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021801510825753212, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01719447411596775, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017160262912511826, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.0156688392162323, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01235493365675211, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09877045452594757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09050572663545609, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08519799262285233, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07652111351490021, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04591839015483856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.041729968041181564, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05691666528582573, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05187518149614334, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04675619304180145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04019087925553322, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03861352056264877, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02890949137508869, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02481020800769329, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02212608978152275, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.021456735208630562, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014470353722572327, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.011592083610594273, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011131991632282734, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010463307611644268, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010037378408014774, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007591764442622662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007650955114513636, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006635086610913277, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0052027590572834015, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08153204619884491, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07427710294723511, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06846076995134354, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06160712242126465, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03758430480957031, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03328463435173035, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.048911627382040024, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.044538941234350204, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0383358970284462, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03287086263298988, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03187744319438934, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024759147316217422, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02122410759329796, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01808411441743374, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017268288880586624, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012365500442683697, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009439664892852306, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.008903466165065765, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00851142406463623, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007994170300662518, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006470291875302792, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006350566167384386, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005362414754927158, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004153600428253412, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1897255778312683, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17148719727993011, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1622413843870163, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.14592459797859192, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0874619260430336, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07928943634033203, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.107901431620121, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09702086448669434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0895102322101593, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07586140185594559, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07301785051822662, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05492434650659561, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04627428203821182, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04189933091402054, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.040816161781549454, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027473272755742073, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.021501801908016205, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.020680952817201614, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019145779311656952, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0184245053678751, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014121237210929394, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013485418632626534, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012304980307817459, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008454009890556335, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2206706553697586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1967741847038269, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18698368966579437, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16055934131145477, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10307781398296356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09351229667663574, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12318483740091324, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11146533489227295, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10549665987491608, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08492764830589294, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0804290845990181, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0634545087814331, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05384718254208565, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.050045814365148544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04911615699529648, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031868692487478256, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02668195404112339, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02592478320002556, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023225370794534683, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022614384070038795, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017264796420931816, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017833106219768524, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016049150377511978, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013130603358149529, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15048572421073914, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1409088671207428, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13702061772346497, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1248074620962143, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07097269594669342, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0673084482550621, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08077413588762283, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07417359203100204, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07179903984069824, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06352850794792175, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06094203516840935, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0412365086376667, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03568931668996811, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03424982726573944, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03391079977154732, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020729966461658478, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018080255016684532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01779109239578247, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016631053760647774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016418393701314926, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011233022436499596, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011655177921056747, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010749422013759613, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00845804251730442, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23029911518096924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21674178540706635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21187521517276764, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19278527796268463, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10877099633216858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10390008985996246, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12215501815080643, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11233453452587128, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10984280705451965, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09757495671510696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09366217255592346, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.062383007258176804, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.053846608847379684, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05224813148379326, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.051869675517082214, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03133733198046684, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027112659066915512, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026751894503831863, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02487032674252987, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024636920541524887, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016776887699961662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016712449491024017, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016253389418125153, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011466573923826218, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23119837045669556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20182359218597412, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18955586850643158, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16660253703594208, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10561040043830872, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09378531575202942, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12916524708271027, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11628955602645874, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10928861796855927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08794156461954117, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08346576243638992, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06583962589502335, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.055974967777729034, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.051116250455379486, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04992622882127762, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.033259063959121704, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027270788326859474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.026691725477576256, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023899374529719353, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023125194013118744, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01827959343791008, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018375609070062637, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01666593737900257, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01340460404753685, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11182929575443268, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1030770018696785, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09758727252483368, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08759299665689468, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05225271359086037, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0478707030415535, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06432883441448212, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05842462554574013, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05309903621673584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04588747397065163, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04407499358057976, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03280595690011978, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028058858588337898, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025268102064728737, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024573305621743202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016484539955854416, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013382850214838982, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012908383272588253, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012151101604104042, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011722324416041374, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00880486611276865, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008959541097283363, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0078038983047008514, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006360235624015331, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08977974206209183, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08149825036525726, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0743309035897255, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06701599061489105, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04125424847006798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03613049164414406, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.054562319070100784, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04990191385149956, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04219750314950943, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03611712530255318, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03513212129473686, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.027601197361946106, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.023752205073833466, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.019899507984519005, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018889419734477997, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013796602375805378, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010396825149655342, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009738895110785961, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009385932236909866, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008741644211113453, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007194052450358868, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007094561122357845, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005861023440957069, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004631702788174152, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2039724588394165, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18503795564174652, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17559181153774261, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1587192863225937, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0949382409453392, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08643919229507446, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11638972908258438, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10419449210166931, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09712060540914536, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08250969648361206, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07958784699440002, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0595138818025589, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04980595409870148, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04560091346502304, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04455127567052841, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029768047854304314, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02337818779051304, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022551150992512703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02082766219973564, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020135099068284035, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015367455780506134, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014537383802235126, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013519229367375374, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009115793742239475, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19369585812091827, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17511457204818726, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.168751060962677, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14461973309516907, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08988325297832489, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08331980556249619, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10412181913852692, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09457297623157501, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09136306494474411, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07550716400146484, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07025912404060364, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05361134558916092, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04585522413253784, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.043744929134845734, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04322652891278267, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02713937498629093, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02361323870718479, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023146843537688255, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020918138325214386, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020604180172085762, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015226068906486034, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015959369018673897, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014560260809957981, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01227539125829935, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13946247100830078, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.130340114235878, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12653960287570953, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11520880460739136, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06572086364030838, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06217227876186371, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0750579982995987, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06901968270540237, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06653901189565659, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05877039581537247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05640740320086479, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03835660219192505, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03318726643919945, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03171948716044426, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03136537969112396, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.019230205565690994, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0167094599455595, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01642044447362423, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015342725440859795, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015124650672078133, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010352285578846931, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010743359103798866, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009859924204647541, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007734288461506367, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.21882693469524384, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20562207698822021, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20072756707668304, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.182657390832901, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10324682295322418, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09837091714143753, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11650099605321884, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10699096322059631, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10431504249572754, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09258496761322021, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08881419152021408, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05947325378656387, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05126851424574852, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04957153648138046, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04916631430387497, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02981090359389782, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02563614957034588, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02527369000017643, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023490775376558304, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.023236596956849098, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015927450731396675, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015728773549199104, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015361386351287365, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010638182982802391, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2180885672569275, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18847060203552246, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.175281822681427, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1536332666873932, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09905749559402466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08681830018758774, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12374965846538544, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11120553314685822, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.102986641228199, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08201327919960022, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07808101177215576, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06339702755212784, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.053619157522916794, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.048099663108587265, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04670684039592743, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03222019597887993, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02589360810816288, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025238776579499245, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02269287407398224, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02181011065840721, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018039902672171593, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017879707738757133, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016229214146733284, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013240624219179153, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11583327502012253, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10707373917102814, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10138831287622452, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09158512949943542, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05410749465227127, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.049634069204330444, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06659913808107376, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0606498084962368, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05499175563454628, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04787994176149368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.046241629868745804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03392355516552925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02905263938009739, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026114564388990402, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025379573926329613, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016998974606394768, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01372106559574604, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013221810571849346, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012502269819378853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012046173214912415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008998067118227482, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00907168909907341, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007944890297949314, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0062825437635183334, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08844833076000214, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0810558870434761, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07388374209403992, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0668981671333313, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.040779951959848404, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03580733388662338, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05431085824966431, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04968656226992607, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04163135588169098, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.036048054695129395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03519881144165993, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.027503617107868195, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02362765744328499, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.019666705280542374, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018596278503537178, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013766063377261162, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010239262133836746, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009569879621267319, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009314735420048237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008641399443149567, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007190791890025139, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006983542814850807, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005837661679834127, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004479460883885622, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21553345024585724, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1968749612569809, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18854184448719025, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17050445079803467, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1004927009344101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09267857670783997, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12054571509361267, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10843360424041748, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10238721966743469, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08790368586778641, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08452318608760834, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06140941381454468, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05180411785840988, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.048150692135095596, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.047241002321243286, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030695302411913872, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024530237540602684, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023806096985936165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021944886073470116, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02134823054075241, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01572764292359352, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01493045687675476, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014083285816013813, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009159635752439499, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21016933023929596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1918315440416336, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18559961020946503, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16056515276432037, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09833826869726181, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09173621237277985, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11375313997268677, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1024971455335617, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09965857118368149, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08330202102661133, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0778701901435852, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.058579642325639725, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04984389618039131, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.047980304807424545, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04756026715040207, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029636045917868614, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0260698813945055, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025636324658989906, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023235926404595375, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022970912978053093, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016626788303256035, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01769597828388214, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016061246395111084, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013807401061058044, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13570255041122437, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1269463747739792, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1234181821346283, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11235389858484268, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06392691284418106, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06055749952793121, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07272753864526749, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06694991886615753, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06470496952533722, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05718860775232315, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05485624819993973, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.037105727940797806, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03215917944908142, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.030804991722106934, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.030487241223454475, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01860419660806656, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01617751456797123, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.015908779576420784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.014851527288556099, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.014651603065431118, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009973438456654549, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0103243263438344, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009520941413939, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007368143647909164, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.21526940166950226, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20227345824241638, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1974198967218399, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.17981858551502228, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10161511600017548, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09684250503778458, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11445416510105133, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10531377047300339, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10268446803092957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09116989374160767, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08737434446811676, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.058392513543367386, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05048692598938942, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.048852015286684036, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04845467582345009, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.029287178069353104, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.025401784107089043, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025047725066542625, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023310625925660133, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.023062486201524734, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015714703127741814, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015781814232468605, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015163782984018326, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010913262143731117, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.20467427372932434, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.17619039118289948, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16318225860595703, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.14219556748867035, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09315599501132965, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08092720061540604, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11630266904830933, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10493998974561691, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09681819379329681, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07629824429750443, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07252518087625504, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05950430408120155, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05075840279459953, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.045337237417697906, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04398425668478012, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030292974784970284, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024593783542513847, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023907123133540154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021465353667736053, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02058359421789646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01695265620946884, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017206920310854912, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015145739540457726, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012901893816888332, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11386986821889877, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.104779914021492, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09807874262332916, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08901209384202957, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05302050709724426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.047913502901792526, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0674378052353859, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0611175000667572, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05399857461452484, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.046964824199676514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04553931578993797, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.034310486167669296, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029245149344205856, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02562221884727478, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02470274269580841, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01722121797502041, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01351538859307766, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012905417941510677, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012328866869211197, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01175487320870161, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009158924221992493, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009117044508457184, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007861307822167873, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006289958488196135, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08913524448871613, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08081883192062378, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07211896777153015, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06549379974603653, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04079464077949524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03478563204407692, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05678969621658325, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.051586080342531204, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.041818298399448395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03589018061757088, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.035368338227272034, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02866927534341812, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02447575144469738, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.019704755395650864, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018393073230981827, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014347347430884838, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010317577049136162, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009489393793046474, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00936061330139637, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00853648316115141, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007482944522053003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007243580650538206, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005807059817016125, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004649898502975702, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2111341953277588, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19032922387123108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18039970099925995, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16295936703681946, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09753607213497162, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08847400546073914, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11932147294282913, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10736395418643951, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09984095394611359, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08446913212537766, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08139351010322571, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06096519157290459, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05117689445614815, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04671149700880051, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.045638054609298706, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030496232211589813, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.023929186165332794, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023069996386766434, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021253589540719986, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02053392492234707, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015693753957748413, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014871499501168728, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013717856258153915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009267570450901985, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23137788474559784, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2019561231136322, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19182270765304565, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16362103819847107, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10790043324232101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09648412466049194, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12573277950286865, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11424893140792847, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11017107218503952, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08620648086071014, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08011271804571152, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06470062583684921, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05519891530275345, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05230967700481415, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05160555616021156, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03257102146744728, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027933228760957718, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.027207626029849052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0237765870988369, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.023320335894823074, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017764857038855553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01851288229227066, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01684955507516861, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013792848214507103, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14012955129146576, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13159038126468658, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12827935814857483, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11676706373691559, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06610369682312012, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06289344280958176, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0745052844285965, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06869392842054367, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0668029710650444, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.059247080236673355, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.056752562522888184, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03797600418329239, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03293614089488983, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03177846223115921, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.031506385654211044, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.019005902111530304, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.016547366976737976, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.016306929290294647, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015188408084213734, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015018881298601627, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010129187256097794, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010343268513679504, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009741497226059437, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0072053358890116215, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.22427260875701904, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21108117699623108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20644553005695343, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18801483511924744, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10573598742485046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1010100245475769, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11835017800331116, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10896846652030945, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10672338306903839, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0949537381529808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.090968057513237, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06022365391254425, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05212712287902832, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05067984387278557, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05033496022224426, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.030190814286470413, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026062065735459328, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02572040632367134, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023873617872595787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02365795522928238, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016038386151194572, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015723146498203278, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015556741505861282, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01039417926222086, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2037672996520996, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1765882521867752, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16449373960494995, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.14366264641284943, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09273632615804672, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08121628314256668, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11376739293336868, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10354887694120407, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09617358446121216, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07636725157499313, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07240080833435059, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05824919417500496, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04980819672346115, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.044844064861536026, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04360299929976463, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02952548861503601, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02386907860636711, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023262634873390198, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.020743656903505325, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.019946372136473656, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016309401020407677, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016130097210407257, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014701448380947113, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011624455451965332, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11007949709892273, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10057340562343597, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09381993860006332, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08518403023481369, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.051128461956977844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.045969828963279724, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06524138152599335, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.059073299169540405, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.052201006561517715, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04504309967160225, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04374602064490318, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03323236480355263, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02823152206838131, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024680133908987045, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023778583854436874, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016678251326084137, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012961012311279774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012355546467006207, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011765041388571262, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011199790053069592, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008884131908416748, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008693182840943336, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007613930851221085, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005909163970500231, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09066658467054367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0819830670952797, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07341285794973373, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06668633967638016, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04157867655158043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03555874153971672, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05737684667110443, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.052111100405454636, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.042687688022851944, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.036528028547763824, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03596767410635948, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02913474477827549, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.024831030517816544, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.0200941264629364, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018805328756570816, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014512252062559128, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01053597405552864, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009722597897052765, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009543577209115028, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008731362409889698, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007540490012615919, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007382043171674013, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00584819633513689, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004775067791342735, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20227202773094177, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18053899705410004, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.169255793094635, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15265384316444397, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09283372759819031, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08292803168296814, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11670874804258347, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10447116196155548, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09543715417385101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07988022267818451, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07716304063796997, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.059729427099227905, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.049828313291072845, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.044489260762929916, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04310982674360275, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029865743592381477, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.022761661559343338, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02175484225153923, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020123081281781197, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019233381375670433, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015325743705034256, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014339233748614788, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012959977611899376, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008804630488157272, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23164187371730804, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20583802461624146, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19687512516975403, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1679290384054184, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10806269198656082, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09802795946598053, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1237257644534111, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11396560817956924, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10991548746824265, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08751168847084045, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.081719771027565, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06345896422863007, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05475727096199989, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0520641915500164, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05141787976026535, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031839799135923386, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02726016938686371, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026598256081342697, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023207323625683784, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022767512127757072, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01702200248837471, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017360826954245567, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016122965142130852, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012221762910485268, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1399281919002533, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1316526085138321, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12857499718666077, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1170850321650505, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06608125567436218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0630267783999443, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07417293637990952, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06838914752006531, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06672026962041855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05930225923657417, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05681172385811806, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03785325214266777, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03279362991452217, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03175339102745056, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.031510043889284134, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.018962737172842026, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.016519729048013687, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01629602536559105, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01517000887542963, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015016024000942707, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010138321667909622, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010274902917444706, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.00979657843708992, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007135632913559675, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23137488961219788, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21816226840019226, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21365465223789215, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1946396678686142, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10929414629936218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10458925366401672, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.121950663626194, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11234840005636215, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11028913408517838, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09827576577663422, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09407275170087814, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06223073601722717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05374618619680405, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.052380725741386414, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05206525698304176, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031157808378338814, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02693590521812439, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.0266160536557436, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024697668850421906, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024497997015714645, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016595659777522087, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016235284507274628, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016144949942827225, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010747279971837997, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.20191875100135803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.17489905655384064, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16269470751285553, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1427871137857437, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09159678965806961, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08011201024055481, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11346875131130219, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10282294452190399, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09517670422792435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07575319707393646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07198146730661392, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0575299933552742, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04933876544237137, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04424488544464111, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04296950250864029, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0292698722332716, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02345464378595352, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02281699888408184, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0204042736440897, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.019559873268008232, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016342872753739357, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015764201059937477, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014736116863787174, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01116358395665884, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11153674125671387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10144512355327606, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09441494196653366, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08563920855522156, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05170051380991936, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.046263743191957474, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06611791998147964, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06002819910645485, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05294734239578247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04543847218155861, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04421873763203621, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03375137597322464, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028734564781188965, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02497200295329094, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024009179323911667, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016945242881774902, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013143938034772873, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01250715646892786, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011915127746760845, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011312623508274555, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00900398101657629, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008873041719198227, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007644422817975283, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0060476516373455524, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09825710207223892, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0888645276427269, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07982084155082703, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07245945930480957, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.045194532722234726, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.038760218769311905, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06215694919228554, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05635787546634674, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0463859960436821, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03967444971203804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0390038900077343, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03149653598666191, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026887480169534683, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.021876554936170578, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02052072249352932, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01575939916074276, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011503715068101883, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010646737180650234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010418562218546867, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009576628915965557, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008232665248215199, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00806919950991869, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006468518637120724, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005304113496094942, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20236685872077942, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1797991394996643, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1675366759300232, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15106558799743652, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09267562627792358, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08208402246236801, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11766549944877625, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10564128309488297, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09541428834199905, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07950829714536667, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0768720880150795, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05997682735323906, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.050325922667980194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.044448815286159515, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04295903816819191, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029955781996250153, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.022799311205744743, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.021688038483262062, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020125648006796837, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01913684420287609, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01537517923861742, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014536313712596893, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01292384508997202, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008974029682576656, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2121192067861557, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18757493793964386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1783798485994339, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14879441261291504, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09886118769645691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08982756733894348, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11563540995121002, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10572434216737747, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10107208788394928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08019638806581497, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0721021220088005, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05923645570874214, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.050926968455314636, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04783276095986366, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.047072507441043854, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02971392311155796, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02528201974928379, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.024601222947239876, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02166849561035633, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021164795383810997, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015968337655067444, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01654115319252014, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014967258088290691, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011921115219593048, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15176910161972046, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14280806481838226, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13952085375785828, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12708163261413574, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07163389027118683, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06835954636335373, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08020627498626709, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0740974098443985, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07232995331287384, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06428278982639313, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06158166006207466, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04087895527482033, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03547118231654167, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03436512500047684, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.034101247787475586, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020437724888324738, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.017771992832422256, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017524057999253273, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016294267028570175, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016129903495311737, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010794596746563911, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010895284824073315, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010432912036776543, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007383113726973534, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23455262184143066, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22119948267936707, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21665704250335693, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1973171830177307, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11069883406162262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10596682131290436, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12311770021915436, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11375848203897476, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11169003695249557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09952817112207413, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09528804570436478, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06268686801195145, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05437982827425003, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05298984795808792, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.052664343267679214, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031301677227020264, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027153408154845238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026826247572898865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02487253025174141, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024669267237186432, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01638175919651985, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01621926762163639, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015915334224700928, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010529085993766785, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2054552584886551, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.17939750850200653, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16737207770347595, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.14761805534362793, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09351819008588791, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08237604796886444, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11569984257221222, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10480454564094543, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09694115817546844, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07808305323123932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07438566535711288, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05874088406562805, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05031457543373108, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.045227937400341034, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04394499957561493, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029837297275662422, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02404067851603031, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023405233398079872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021080534905195236, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02025834284722805, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01656145229935646, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01621769368648529, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01492338627576828, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011613169685006142, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.113249771296978, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1035972386598587, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09700480848550797, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08809350430965424, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.052688222378492355, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0473879873752594, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06664001941680908, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060373615473508835, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.053733933717012405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.046359237283468246, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04499833658337593, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.033891383558511734, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028858384117484093, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025393513962626457, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02451251819729805, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016972927376627922, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013243217952549458, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012616361491382122, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011981252580881119, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011420896276831627, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008964446373283863, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008735286071896553, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007726333104074001, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005786603316664696, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09667648375034332, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08724742382764816, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07945404946804047, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07210838049650192, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.044429294764995575, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.038640640676021576, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.059239163994789124, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05384540930390358, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.045548394322395325, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0388762392103672, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0379953570663929, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03003644198179245, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02566029131412506, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02141629159450531, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020320570096373558, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015002036467194557, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01117615308612585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010436972603201866, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010085671208798885, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00937290396541357, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007839668542146683, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007622672710567713, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006342748645693064, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0049221510998904705, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.216252863407135, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19575235247612, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18577443063259125, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16780352592468262, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1000417023897171, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09099473059177399, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12168221175670624, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11012381315231323, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10228852927684784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08704881370067596, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08370888233184814, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.061925821006298065, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.052572108805179596, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04797527194023132, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04683469980955124, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03092818520963192, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024543458595871925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023654993623495102, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021860377863049507, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.021119724959135056, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015892762690782547, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015226701274514198, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01400633156299591, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009435578249394894, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19816461205482483, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17785672843456268, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17100326716899872, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14548331499099731, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09197153151035309, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08484116196632385, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10602975636720657, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09703768044710159, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09391459077596664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0758638009428978, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07028453052043915, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05425082519650459, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04669787362217903, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.044380538165569305, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04381817951798439, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02720009535551071, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023359142243862152, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022856589406728745, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020210497081279755, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01986124739050865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014645771123468876, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015064653940498829, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013907764106988907, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010812886990606785, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1561332643032074, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1468215435743332, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1434822380542755, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13057735562324524, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07365734130144119, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07027433067560196, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08259685337543488, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07612086832523346, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07435625046491623, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06606452912092209, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06330893188714981, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0420573391020298, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03647363558411598, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03536314144730568, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.035103727132081985, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021100470796227455, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018345044925808907, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01809520088136196, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01682484894990921, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01665952242910862, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011299819685518742, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011319603770971298, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010941967368125916, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007762602996081114, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23765720427036285, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22390970587730408, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21925973892211914, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19959956407546997, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11214753240346909, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10732576996088028, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12470567971467972, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11524657160043716, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11318576335906982, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10073353350162506, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0963207557797432, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06347717344760895, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05510848015546799, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05370846390724182, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05337778851389885, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031732164323329926, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027519794180989265, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02718656323850155, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025192415341734886, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024977121502161026, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016630712896585464, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016442488878965378, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01616118848323822, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010695071890950203, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.20826521515846252, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18110692501068115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16851425170898438, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.14942969381809235, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.0946975126862526, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08298242837190628, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11611378937959671, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10640634596347809, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09825602918863297, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07906821370124817, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07548008859157562, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05928756296634674, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0511324517428875, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04581145569682121, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04446573555469513, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029911644756793976, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024365006014704704, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02370441146194935, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021375909447669983, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02051452547311783, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016173547133803368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01647864654660225, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014401617459952831, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011806026101112366, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1181766539812088, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10860254615545273, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10189425200223923, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09247363358736038, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05510542914271355, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04982056841254234, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06930215656757355, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06307578086853027, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.056132204830646515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0486709363758564, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04725896567106247, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.035291947424411774, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030188072472810745, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026562843471765518, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02565760910511017, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01766805723309517, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013894339092075825, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013281421735882759, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012622766196727753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012051211670041084, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009289833717048168, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009214418940246105, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00800435058772564, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006192303728312254, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10241174697875977, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0934893935918808, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08488821983337402, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07719072699546814, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04735136777162552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04125446081161499, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06375835090875626, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.057971641421318054, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04840519651770592, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.041780441999435425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04099755734205246, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03231370449066162, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027644705027341843, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022850360721349716, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021577807143330574, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016175124794244766, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011929807253181934, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011105483397841454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010831520892679691, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010016688145697117, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008427566848695278, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008194172754883766, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006752198562026024, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005272137001156807, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22474052011966705, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20521540939807892, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19633303582668304, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17791606485843658, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1046203151345253, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09635035693645477, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12470895797014236, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1132550984621048, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10667578876018524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09164419025182724, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08792256563901901, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06338196247816086, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0540006160736084, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05010771378874779, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.049140699207782745, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03163979947566986, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.025575414299964905, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.024822410196065903, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022919774055480957, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022296814247965813, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016248993575572968, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015643270686268806, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014634845778346062, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009690629318356514, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.18407666683197021, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16712762415409088, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15819714963436127, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13141915202140808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08660665154457092, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07884170114994049, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10429602861404419, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09562557935714722, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0874996930360794, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07158704102039337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06502696871757507, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.053275320678949356, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04585926607251167, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04174739867448807, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.040729742497205734, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02665254846215248, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.021739136427640915, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.020963657647371292, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018752221018075943, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01807243376970291, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014031779952347279, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014025110751390457, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01263608131557703, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009428977966308594, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15594026446342468, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14666114747524261, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1432734876871109, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1303492784500122, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0736093521118164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07022812217473984, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08237110078334808, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07607392966747284, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07434223592281342, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0659756138920784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06312787532806396, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04196562618017197, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.036423832178115845, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03531975299119949, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03505205363035202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020996086299419403, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018261928111314774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018015317618846893, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01672815904021263, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01656295731663704, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011110838502645493, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011189653538167477, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010749615728855133, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0075754281133413315, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23993341624736786, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22610335052013397, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2214556336402893, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20167720317840576, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11329177021980286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10840802639722824, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.126359760761261, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11639789491891861, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11433645337820053, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10173022747039795, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09739819914102554, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06432805955410004, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05568569153547287, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05428902804851532, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05396332964301109, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03225255385041237, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027857068926095963, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027519091963768005, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02549459971487522, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025282051414251328, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017095785588026047, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016692308709025383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01664125546813011, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0109248710796237, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21370087563991547, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18626528978347778, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17378941178321838, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1542806625366211, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09714160859584808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08546019345521927, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11948534846305847, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10874893516302109, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10078451782464981, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08136564493179321, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07762173563241959, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06065564975142479, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05219113826751709, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04691349342465401, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04558900371193886, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030783291906118393, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02482406236231327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02416583150625229, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021777410060167313, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.020921440795063972, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016965605318546295, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01660851575434208, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015264048241078854, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011731447651982307, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12428521364927292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11456146091222763, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10779353231191635, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09776613861322403, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05805074796080589, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05273526534438133, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0726279467344284, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06611621379852295, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.059079140424728394, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05134285241365433, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04982950538396835, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03692599758505821, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03159824013710022, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02795916423201561, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.027036558836698532, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018477505072951317, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014554460532963276, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01392680499702692, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013209265656769276, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012633189558982849, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009672905318439007, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009527906775474548, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008365031331777573, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00630879495292902, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10225614160299301, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09358564019203186, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0854763612151146, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07757434248924255, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04732932522892952, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.041477020829916, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06286695599555969, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.057531725615262985, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.048353563994169235, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.041854023933410645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04089764878153801, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.031821731477975845, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027441490441560745, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022822173312306404, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021602122113108635, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015872053802013397, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011874202638864517, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011073912493884563, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010773484595119953, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009996543638408184, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008226616308093071, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008065744303166866, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006640059873461723, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005117231979966164, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22452446818351746, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20580992102622986, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19702266156673431, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17855221033096313, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10464383661746979, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0964718610048294, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12493404000997543, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11315649747848511, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10655497759580612, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09182461351156235, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0883156955242157, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06354924291372299, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05399414524435997, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.050124071538448334, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04918317869305611, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031733736395835876, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02557053416967392, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02478107623755932, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022921528667211533, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022303098812699318, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01629498228430748, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015567786060273647, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014663218520581722, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00958140566945076, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1946234554052353, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17205196619033813, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16421428322792053, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13792163133621216, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09087307006120682, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08272060751914978, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10520405322313309, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09595197439193726, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0927606001496315, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07347461581230164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06539558619260788, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.053489360958337784, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04597847908735275, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.043653517961502075, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04310257360339165, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.026816459372639656, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02256564237177372, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02200373075902462, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019064560532569885, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018677160143852234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014230667613446712, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01400151289999485, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013432794250547886, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009359133429825306, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15881721675395966, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14921484887599945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14577358961105347, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1326330602169037, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07498468458652496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07149504125118256, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0840476006269455, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07751765102148056, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07573096454143524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06717349588871002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0642838180065155, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04286646842956543, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03713768720626831, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03599783405661583, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03572709485888481, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021458635106682777, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0186367928981781, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018381737172603607, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017059510573744774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01689201593399048, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01142582856118679, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011452940292656422, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011050679720938206, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007787961047142744, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24207660555839539, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22805199027061462, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22323158383369446, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20317423343658447, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11437693983316422, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10938337445259094, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1270805299282074, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11753430962562561, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1154470443725586, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10258802771568298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.098055399954319, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06468040496110916, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.056174665689468384, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.054751165211200714, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.054422296583652496, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03228747472167015, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0279863141477108, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027634531259536743, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025562727823853493, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02535160817205906, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01678897999227047, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016615640372037888, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01630973443388939, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010642058216035366, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21275296807289124, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18525075912475586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17207379639148712, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15318316221237183, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09647737443447113, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08446168154478073, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11929620802402496, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10915561765432358, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10024987161159515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08109307289123535, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0775778666138649, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0607227198779583, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05221327766776085, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04645746946334839, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04501822963356972, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030510952696204185, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024354763329029083, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023632938042283058, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0213442612439394, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.020406492054462433, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01644313335418701, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016114475205540657, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014591074548661709, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010988316498696804, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11709870398044586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10740849375724792, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10019079595804214, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09090255200862885, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.054607655853033066, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04903569445014, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06933216750621796, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06323465704917908, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.055685725063085556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04823136329650879, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.046796586364507675, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03529854118824005, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03029000200331211, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026342472061514854, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025331152603030205, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017656391486525536, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013773412443697453, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01309498492628336, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01250639371573925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011869239620864391, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00921436958014965, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009186542592942715, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007813512347638607, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006115104537457228, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09778334200382233, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08896264433860779, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07986146956682205, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07259614020586014, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04512137174606323, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03874727338552475, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06209466978907585, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05644040182232857, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.046187542378902435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03978570178151131, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.039070501923561096, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03144130855798721, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026876108720898628, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02179686725139618, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02042548730969429, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015710530802607536, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011409209109842777, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010544989258050919, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010356397368013859, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009497616440057755, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008213496766984463, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007959626615047455, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00646748673170805, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005131606012582779, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21377474069595337, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1928710639476776, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18208956718444824, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16458825767040253, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09871877729892731, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.089237742125988, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1222689151763916, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11004768311977386, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10117054730653763, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08577005565166473, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08284078538417816, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06247101351618767, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05245111510157585, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04736814275383949, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04607505351305008, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.031240636482834816, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024213217198848724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023228375241160393, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021540142595767975, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020702535286545753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01604056917130947, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015096284449100494, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013858547434210777, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009251080453395844, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.18223164975643158, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16423299908638, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15879058837890625, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1383410394191742, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08592256158590317, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07966621220111847, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09648977220058441, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08869387954473495, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08704940229654312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0719950869679451, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06599773466587067, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04932302609086037, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04252691566944122, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.041267335414886475, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04096100106835365, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02465522661805153, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0212550088763237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02088700234889984, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018449433147907257, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018250083550810814, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.012973513454198837, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01292164996266365, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01255696639418602, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.008633015677332878, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16229841113090515, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1525159478187561, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14893953502178192, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13552312552928925, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07674899697303772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07312341779470444, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08592241257429123, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0793595165014267, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0775124654173851, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06867969036102295, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06564878672361374, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04379189759492874, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.038002803921699524, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.036829784512519836, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03654700145125389, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021888582035899162, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01902693137526512, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018760642036795616, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017400233075022697, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01722138747572899, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011539516970515251, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011638417840003967, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011148868128657341, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007843553088605404, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2416982799768448, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22759072482585907, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22282834351062775, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20264935493469238, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11432930827140808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1092931479215622, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12734092772006989, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11752097308635712, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11542463302612305, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10247430950403214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09792449325323105, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06490492075681686, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0562262199819088, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05479643493890762, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0544535331428051, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0324406735599041, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028108900412917137, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027753902599215508, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02568773925304413, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02546742372214794, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017081715166568756, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016838572919368744, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016596179455518723, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010997258126735687, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21677826344966888, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1897122859954834, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17655469477176666, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15718615055084229, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09858906269073486, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08665405213832855, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12195885181427002, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11148462444543839, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10230296850204468, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08326323330402374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07965262979269028, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06216847151517868, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.053526148200035095, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04765874892473221, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04617922008037567, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031599000096321106, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025308921933174133, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024575192481279373, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022369107231497765, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021420728415250778, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017532233148813248, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017123430967330933, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015684453770518303, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012161632999777794, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12260667234659195, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11260517686605453, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10622543841600418, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09652125090360641, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05724937841296196, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.05209028720855713, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0709042027592659, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0646056979894638, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05833560600876808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.050548650324344635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04892529174685478, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03613520413637161, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030912183225154877, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02759784646332264, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.026772456243634224, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018112637102603912, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01441203709691763, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013834835961461067, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013079346157610416, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012554111890494823, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009514656849205494, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009444253519177437, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00831617508083582, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006347788032144308, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10058669745922089, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09240005910396576, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08641856163740158, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07842642068862915, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0468626506626606, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.042198546230793, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05940850451588631, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05408839136362076, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04774169623851776, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04144268110394478, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04015572369098663, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03006664849817753, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.025943730026483536, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02258545346558094, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021721141412854195, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015073305927217007, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011753572151064873, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011180168017745018, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010672522708773613, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010131077840924263, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007901912555098534, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007776136044412851, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006744240876287222, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005097203887999058, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.207983136177063, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1907626986503601, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18253089487552643, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16540215909481049, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09716223180294037, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08953646570444107, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11497224122285843, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10498138517141342, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09888025373220444, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08518385887145996, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08170962333679199, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05841532349586487, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.050134751945734024, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.046507805585861206, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.045603081583976746, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029172537848353386, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02365471050143242, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022936955094337463, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021203240379691124, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02061123587191105, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014954675920307636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014350401237607002, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013589323498308659, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00869506411254406, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1907663643360138, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16851738095283508, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.161000058054924, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1338718980550766, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08985268324613571, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08163906633853912, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10252106189727783, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09403443336486816, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09134051203727722, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07056679576635361, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06539962440729141, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.052673764526844025, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.045281413942575455, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04341559112071991, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04298222437500954, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02649969793856144, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.022919217124581337, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02242017537355423, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01917426846921444, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018877949565649033, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014401259832084179, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014779211021959782, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013792864046990871, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010718638077378273, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15709124505519867, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1474151611328125, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14386281371116638, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1307261735200882, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07431326806545258, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07068216055631638, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08345218002796173, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07704053074121475, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07507951557636261, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06639877706766129, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06354393064975739, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04263395443558693, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03694969788193703, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.035707615315914154, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.035411011427640915, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021333031356334686, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018521135672926903, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018250005319714546, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016937678679823875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01675436832010746, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011334819719195366, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011442327871918678, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010923486202955246, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007832853123545647, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2323029339313507, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2185840755701065, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.213898167014122, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19441665709018707, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10994720458984375, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10498584806919098, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12254549562931061, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11314860731363297, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1109994575381279, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09844498336315155, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0941651314496994, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06250668317079544, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05413800850510597, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05268464237451553, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05234494432806969, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031243110075592995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027016492560505867, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026665745303034782, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02466537430882454, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02444610185921192, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01642644591629505, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01618664525449276, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015939267352223396, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010545422323048115, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2163441926240921, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1887620985507965, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.174811452627182, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15555453300476074, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09834348410367966, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08586534857749939, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12389041483402252, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11219332367181778, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1021636500954628, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08279114961624146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07928141951560974, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06287442147731781, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05386565253138542, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0475948229432106, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.046017277985811234, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03177323564887047, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02534763514995575, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02456841804087162, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022370953112840652, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021359950304031372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01742238737642765, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017301278188824654, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015373204834759235, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012346259318292141, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11191459745168686, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10272683203220367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09473800659179688, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08606894314289093, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.052226725965738297, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.046341583132743835, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06863454729318619, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06215484440326691, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.053256724029779434, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04616375267505646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0451594702899456, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03496086597442627, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029746390879154205, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025252478197216988, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024094020947813988, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01753101497888565, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013318386860191822, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012560924515128136, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012130383402109146, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011404191143810749, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009203941561281681, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009138439781963825, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007575490511953831, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006189921870827675, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09154650568962097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08316218107938766, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07284034043550491, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06638647615909576, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.042113352566957474, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03514096513390541, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06083875149488449, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05516170710325241, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04318922385573387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03724706545472145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.036925606429576874, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030801663175225258, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026230407878756523, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.0204040315002203, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01876150257885456, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015432514250278473, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010755610652267933, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009749960154294968, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009817617014050484, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008792232722043991, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008036931976675987, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00779018085449934, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0060114688239991665, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005021767225116491, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21104532480239868, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18989726901054382, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17827653884887695, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1610279381275177, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09756447374820709, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08741994947195053, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1230107918381691, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11027587950229645, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10006793588399887, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08463511615991592, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08181717991828918, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06297335773706436, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05275820568203926, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.046868909150362015, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.045403074473142624, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03152460232377052, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024134740233421326, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02305113896727562, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02151048742234707, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020543575286865234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016220083460211754, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015433127991855145, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0137755386531353, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009749419055879116, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21035903692245483, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18394596874713898, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17509157955646515, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14912216365337372, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09797924011945724, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0881485566496849, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1129678338766098, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10335550457239151, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09999488294124603, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0782579630613327, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07233656942844391, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.057905834168195724, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04967693239450455, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.047218043357133865, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0466272346675396, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02900237962603569, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024709558114409447, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0240471251308918, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02081194706261158, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02040577307343483, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015501110814511776, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015736445784568787, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014716172590851784, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011055145412683487, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14241492748260498, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13342414796352386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12990672886371613, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11801952123641968, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06732146441936493, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06390997767448425, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07603821158409119, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07020244747400284, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06811569631099701, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06010635197162628, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.057509645819664, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.038828156888484955, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03366437554359436, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.032367583364248276, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03206166997551918, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.019422631710767746, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.016790742054581642, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.016525160521268845, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015337983146309853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015145262703299522, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01028390321880579, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010415920056402683, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009856210090219975, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007119189016520977, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2155020534992218, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2023613452911377, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19765803217887878, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.17960460484027863, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10199615359306335, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09721451997756958, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11416070908308029, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10537677258253098, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10305541753768921, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09116493165493011, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08715930581092834, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05827564001083374, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05047721415758133, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.048953671008348465, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04859066754579544, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.029169248417019844, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02526891604065895, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02492251805961132, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023077867925167084, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.022847382351756096, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015435482375323772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015414932742714882, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014916393905878067, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010347031988203526, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.1990438997745514, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.17225688695907593, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.15835979580879211, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.14018899202346802, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09039676189422607, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.07813237607479095, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11470262706279755, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10422307252883911, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09413664788007736, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07541709393262863, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07223517447710037, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05869029089808464, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0502670593559742, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.043995894491672516, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0424066036939621, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02996538020670414, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02384813502430916, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023057885468006134, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02105649933218956, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.020039327442646027, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016937347128987312, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016808904707431793, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01495201513171196, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01245968509465456, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10996367037296295, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10087835788726807, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09339885413646698, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08496152609586716, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05116700381040573, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04550978168845177, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06766487658023834, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06044504791498184, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05221545696258545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.045316729694604874, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.044311150908470154, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03450477495789528, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028926849365234375, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024699930101633072, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02362062595784664, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0172872394323349, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012972808443009853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012260708026587963, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011821280233561993, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011137723922729492, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009086479432880878, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008808569982647896, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00747993728145957, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005897423718124628, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09040369093418121, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08337363600730896, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07577201724052429, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06890788674354553, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04187256097793579, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03662852942943573, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05723947659134865, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05157450959086418, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.042720843106508255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037327852100133896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03657621890306473, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02906838245689869, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02460954152047634, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020210841670632362, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019024336710572243, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014544774778187275, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010521244257688522, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009763752110302448, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009615739807486534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008855678141117096, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007556620053946972, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007221406325697899, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005944123491644859, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004555415827780962, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22806201875209808, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21004195511341095, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2011984884738922, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1825626939535141, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10680925846099854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09873194247484207, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1276715099811554, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11585790663957596, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10870002210140228, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09431357681751251, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09080862253904343, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06528764963150024, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05540585145354271, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.051170021295547485, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05013846606016159, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.032685816287994385, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02620849385857582, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02540154568850994, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023650502786040306, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022977441549301147, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016839561983942986, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01615636982023716, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015070829540491104, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010124243795871735, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.18455193936824799, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.160492405295372, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15196660161018372, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13125216960906982, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08577930927276611, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07678677141666412, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10116584599018097, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09114206582307816, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08791882544755936, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06881707906723022, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06397977471351624, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05186225101351738, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.043782398104667664, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04133623093366623, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04075877368450165, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.025987939909100533, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.021648883819580078, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02103404514491558, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018303772434592247, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.017905212938785553, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.013810842297971249, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.013847924768924713, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.012957999482750893, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009703464806079865, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13764651119709015, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12874530255794525, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12516237795352936, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.113729327917099, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06503905355930328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.061613988131284714, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0737440213561058, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06797020137310028, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0658096969127655, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05799496918916702, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05549808591604233, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03764248266816139, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03263380378484726, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03131338581442833, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03099716454744339, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01885724812746048, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.016339868307113647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.016064895316958427, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.014939802698791027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.014740505255758762, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01005752943456173, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010290171951055527, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009616455063223839, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007189632393419743, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1914873868227005, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17951542139053345, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17510086297988892, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.15919950604438782, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09121093153953552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08677752315998077, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1024433821439743, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0945407897233963, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09223844110965729, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08158327639102936, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07817220687866211, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.052853114902973175, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.046156805008649826, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04468619078397751, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04433346539735794, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02664042077958584, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02458409033715725, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.024289000779390335, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.022815654054284096, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.022602761164307594, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015032866969704628, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01704772561788559, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014572656713426113, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013669846579432487, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.20171616971492767, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.17608533799648285, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16384735703468323, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.14376786351203918, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09188225865364075, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08080222457647324, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11396398395299911, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10358193516731262, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.0952036902308464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07650134712457657, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07255155593156815, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05777740105986595, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04974634572863579, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04447030648589134, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04314129054546356, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029062941670417786, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02372341975569725, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.023069264367222786, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02079792134463787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.019952790811657906, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01573866605758667, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016148170456290245, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014007159508764744, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011658862233161926, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10677747428417206, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09750126302242279, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0884571224451065, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08028256148099899, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04964771121740341, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.043211430311203, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06838392466306686, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060927268117666245, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05075669661164284, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04375099763274193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04317693039774895, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03498354181647301, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029219187796115875, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024080771952867508, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02271992340683937, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017577441409230232, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012792399153113365, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011943819932639599, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011651778593659401, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010817078873515129, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009299001656472683, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009031987749040127, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007375030312687159, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006184103433042765, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09054087102413177, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08217886835336685, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07134968787431717, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06482450664043427, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04158053919672966, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.034295182675123215, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06152772158384323, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05533194541931152, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04272611439228058, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03683476150035858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0366327241063118, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.031250759959220886, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026430515572428703, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02016349509358406, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018415365368127823, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015675988048315048, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01067056879401207, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009574692696332932, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009751630015671253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008632302284240723, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008144049905240536, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007846242748200893, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005889100022614002, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005032165441662073, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20240618288516998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18105705082416534, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16818299889564514, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15214373171329498, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09321533143520355, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08239568769931793, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12043239921331406, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10756492614746094, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0957770049571991, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08062630891799927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07851268351078033, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06177070736885071, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05144235119223595, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04485626518726349, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04314914718270302, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03105849213898182, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.023247119039297104, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0220453143119812, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020714817568659782, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019633037969470024, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01607612334191799, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015251430682837963, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013216715306043625, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00980041828006506, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.15870976448059082, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.13963809609413147, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.13374020159244537, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1144876554608345, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.07420423626899719, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.06749904155731201, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.08412326872348785, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.07677824795246124, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.07529109716415405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05924837663769722, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.05530403554439545, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.043207745999097824, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03703124448657036, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0358913354575634, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03560006245970726, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02169959805905819, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.019041316583752632, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.018683578819036484, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01615792140364647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01597556285560131, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.011817864142358303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.012353729456663132, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.011448947712779045, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009151049889624119, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13497121632099152, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12643539905548096, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12305198609828949, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11187530308961868, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06423468142747879, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.060926858335733414, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07254280894994736, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06694940477609634, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06493225693702698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.057374004274606705, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.054978616535663605, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.037459596991539, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03267162665724754, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0314529687166214, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.031164946034550667, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.018892841413617134, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01727338135242462, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017029836773872375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016007181257009506, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01583843119442463, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010626872070133686, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011949819512665272, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010241611860692501, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009536152705550194, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15205714106559753, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14256137609481812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1390395164489746, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12635788321495056, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07235512882471085, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06881648302078247, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08137040585279465, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07501727342605591, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07309068739414215, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06464532762765884, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06196622550487518, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.042011138051748276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03650883212685585, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03530991077423096, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03503554314374924, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021162347868084908, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019214650616049767, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018967609852552414, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017776083201169968, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0176087636500597, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01186648290604353, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013079304248094559, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011484161019325256, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01028432510793209, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.19484135508537292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1717187911272049, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.16168709099292755, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1402716189622879, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.08960199356079102, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08029592782258987, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.10956347733736038, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.09820763766765594, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09205995500087738, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07453864812850952, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07029454410076141, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05605342984199524, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0478891097009182, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04398120194673538, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.043015167117118835, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02880827710032463, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024381747469305992, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02391801029443741, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02166794054210186, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021078558638691902, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016775965690612793, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017420681193470955, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015574749559164047, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013822107575833797, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10458296537399292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09524105489253998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08644917607307434, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07847301661968231, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.048503246158361435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04214988648891449, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0658176839351654, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05946548655629158, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04968060180544853, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.042694151401519775, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04193245247006416, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03357912600040436, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028466887772083282, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.023503487929701805, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02218938246369362, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016834603622555733, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012453758157789707, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011622212827205658, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011323838494718075, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01051135640591383, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00888931192457676, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008752118796110153, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007120550610125065, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00594410439953208, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09085091203451157, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08235952258110046, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07190188765525818, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06528270989656448, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.041728924959897995, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03467946872115135, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06180977076292038, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0549575574696064, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04291171208024025, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03684448078274727, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03663283586502075, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.031319789588451385, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026191329583525658, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020245540887117386, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018597474321722984, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015697654336690903, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010721372440457344, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009701450355350971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009782478213310242, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008738739416003227, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008247851394116879, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007833976298570633, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006080692633986473, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00510729756206274, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21222132444381714, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18727339804172516, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17222222685813904, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15596091747283936, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09746561199426651, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08476496487855911, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1270066797733307, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11377489566802979, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10070038586854935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08327113091945648, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08130484074354172, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06525340676307678, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.054415296763181686, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04687236249446869, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.044944822788238525, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.032724928110837936, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024191470816731453, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02280227281153202, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02136571705341339, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0200917050242424, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01684691570699215, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015884792432188988, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013710063882172108, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009988786652684212, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.08803873509168625, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08001726120710373, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0775584727525711, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06615947186946869, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04017001390457153, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.037509363144636154, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0470811165869236, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0416567288339138, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.040700145065784454, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03357967361807823, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03131780028343201, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.023786811158061028, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.021187428385019302, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02052595466375351, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.020364582538604736, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012523681856691837, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012496495619416237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012348431162536144, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011475058272480965, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011380563490092754, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007738804444670677, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009834040887653828, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00754585163667798, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.008756476454436779, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13321028649806976, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12513098120689392, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12212406098842621, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11080847680568695, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06303919851779938, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.05998546630144119, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07083777338266373, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06528112292289734, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06363581120967865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05625687167048454, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05381079763174057, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.036130260676145554, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03128693252801895, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.030256811529397964, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.030018912628293037, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.018059922382235527, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01563831977546215, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01541047915816307, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.014269547536969185, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.014117339625954628, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009499584324657917, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009584090672433376, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.00915270671248436, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006458044983446598, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.10889960825443268, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.10209670662879944, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.09961158782243729, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.09033698588609695, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.051509011536836624, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.04899415001273155, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.05794496461749077, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.05334195867180824, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.05203023925423622, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.04592623934149742, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.04395195096731186, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.029683416709303856, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.025670936331152916, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.02484171837568283, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.02464781329035759, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01491051260381937, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01308827381581068, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.012906537391245365, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.011997402645647526, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.011874156072735786, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.008108270354568958, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.008369005285203457, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.007834783755242825, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006060956511646509, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.15845297276973724, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1420413702726364, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1343546360731125, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.11635230481624603, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.07357784360647202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.06673876196146011, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.09175095707178116, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.08087871223688126, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.0752720758318901, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.062161851674318314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.05888090655207634, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.04696636274456978, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.039704617112874985, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.036398764699697495, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.035552628338336945, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.024246202781796455, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.020606540143489838, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02020351216197014, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.01861121505498886, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.018123319372534752, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.014196534641087055, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015164690092206001, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.013094808906316757, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0123955262824893, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] } ], "last_module_idx": 66, "base_perplexity": 7.091545310078691 }