{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.011225979775190353, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.009685725905001163, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.004808689933270216, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0050970218144357204, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.00509682297706604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.002029719762504101, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.010811271145939827, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.009605014696717262, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0052778287790715694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.004665352404117584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.004876234102994204, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0050703538581728935, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.004663849715143442, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0027474514208734035, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0020943754352629185, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0026663667522370815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.001872713677585125, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0015801009722054005, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0018171382835134864, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0015292090829461813, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.001745966961607337, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0018170429393649101, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0013758622808381915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0015083309262990952, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.011484600603580475, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.009947041980922222, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.004872004967182875, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.005126976873725653, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.005126687698066235, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0019673348870128393, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.011395968496799469, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.009840155951678753, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.005302621982991695, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.004667161498218775, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.004864155314862728, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.005098411813378334, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.004664404783397913, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.00270249811001122, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.0019826956558972597, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0026581520214676857, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0017268265364691615, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0013817998114973307, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.001660941052250564, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0013176191132515669, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0016685070004314184, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0016608672449365258, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0012580188922584057, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0012914252001792192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11621863394975662, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07074663788080215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.043585389852523804, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.05017346888780594, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0501236654818058, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.02408854104578495, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0790335014462471, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06495632231235504, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05483800545334816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.031639713793992996, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03796061873435974, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04292261600494385, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03130985051393509, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02439999207854271, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02244914509356022, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.022038768976926804, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012848432175815105, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011051850393414497, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00937480479478836, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00785314105451107, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.011334068141877651, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0093171875923872, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007275479845702648, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0060755726881325245, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10300043225288391, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07436399906873703, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06039179861545563, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05025587975978851, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04430491477251053, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03377756103873253, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06280594319105148, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.055215537548065186, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04770246148109436, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0312952920794487, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03039293922483921, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03176712989807129, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.026805317029356956, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.022125285118818283, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.020878903567790985, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01631336659193039, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01312603522092104, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012242043390870094, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011133105494081974, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010387579910457134, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009514513425529003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.010542059317231178, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00814701709896326, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.008752807974815369, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1124524176120758, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.10502063482999802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1026504635810852, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.0939098671078682, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.051017411053180695, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.04886763170361519, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.05687985196709633, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.05238248035311699, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0514809675514698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.04631366208195686, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.044728439301252365, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.029008256271481514, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.025199569761753082, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.02461683191359043, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.02448686957359314, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.014636106789112091, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.013328004628419876, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.013200712390244007, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.012474294751882553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.012404195964336395, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.008267964236438274, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.008951346389949322, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.008105874061584473, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00700489804148674, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.13014456629753113, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12303899228572845, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12074347585439682, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.110648512840271, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.059401802718639374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.05733245611190796, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.06568698585033417, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06063142791390419, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.059828437864780426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05431720241904259, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05225823074579239, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03317800164222717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.028763825073838234, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.028247229754924774, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0281224325299263, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.016554763540625572, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.014479616656899452, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.014350468292832375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01348477229475975, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.013408232480287552, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.008689208887517452, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.008653080090880394, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.008515514433383942, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00570133700966835, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.07749347388744354, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.06711115688085556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.06175007298588753, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.054625723510980606, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.03474527597427368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.030340461060404778, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.04743664711713791, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.039887186139822006, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.036002013832330704, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.028996581211686134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.027822455391287804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.02267547696828842, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.019111238420009613, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.017118580639362335, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.016631511971354485, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.011699416674673557, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.009722874499857426, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.009490455500781536, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.008734035305678844, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.008448611944913864, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.00707415072247386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.007229547016322613, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.006512652616947889, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.00588975939899683, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.019456880167126656, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.013247846625745296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.007565861567854881, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.008242907002568245, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.007990933954715729, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0036068051122128963, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.01378761138767004, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.012481482699513435, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.009113335981965065, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.006095931865274906, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.006538986228406429, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.006978366058319807, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.005961047951132059, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.004002066794782877, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.003392099868506193, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0035172877833247185, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.002303779125213623, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.001913825049996376, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0019877906888723373, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0016096743056550622, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0019181778188794851, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0019676899537444115, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0012975017307326198, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.001441892352886498, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.017529405653476715, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.012182642705738544, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.006664935499429703, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0072927107103168964, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.007102702744305134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.002981343073770404, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.013021694496273994, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.01163486298173666, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.008133072406053543, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.005624394863843918, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.006045772694051266, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.00652903551235795, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.005529114976525307, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.0035428053233772516, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.002892947057262063, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.003280429169535637, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0020190789364278316, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.001627174555324018, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.001771030481904745, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0013663603458553553, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0017456627683714032, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0017565195448696613, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0010802802862599492, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.001231514965184033, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.13468867540359497, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08983325958251953, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0651996061205864, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06355724483728409, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.05796988308429718, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03466644510626793, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0826634019613266, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.07479635626077652, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.06409203261137009, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.039239902049303055, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04031127318739891, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04218120500445366, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03579632565379143, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.028124265372753143, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02605718933045864, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.021066375076770782, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.014824985526502132, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012991692870855331, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011256502941250801, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009652788750827312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.010918660089373589, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.010655210353434086, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.008220221847295761, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006934417877346277, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.14774441719055176, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11571401357650757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10136102885007858, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0870557352900505, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.06768462806940079, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.053924478590488434, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.08755066245794296, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.07753221690654755, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.07018129527568817, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0490117184817791, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04746982082724571, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04508882015943527, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03785456717014313, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.03347715362906456, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.032388459891080856, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.022858357056975365, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.018831145018339157, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.017928730696439743, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.015595193020999432, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0148775614798069, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.012905551120638847, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01392591092735529, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.011533435434103012, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011131570674479008, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1546073704957962, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14635157585144043, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14391809701919556, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13115330040454865, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07166092097759247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06920517235994339, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07951055467128754, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07302279770374298, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07214432209730148, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06526600569486618, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06282614171504974, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04076166823506355, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03550262004137039, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.034913286566734314, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03479250520467758, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020692437887191772, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019231455400586128, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01909758523106575, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018074603751301765, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01801352947950363, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0118954386562109, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013267270289361477, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011739391833543777, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010737852193415165, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1902068555355072, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.18065769970417023, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1776718944311142, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.16233396530151367, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08855896443128586, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08565688878297806, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09778641909360886, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09005270898342133, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08912179619073868, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08074943721294403, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0777309462428093, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.049903132021427155, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043369442224502563, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04273465275764465, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04258323833346367, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025013567879796028, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0228151623159647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.022653840482234955, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02134844847023487, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02126285433769226, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013707488775253296, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014903722330927849, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013505061157047749, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011345546692609787, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.01606542430818081, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.01566004939377308, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.006394848693162203, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.005917958449572325, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.005023597273975611, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0032477183267474174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.01576821319758892, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.015043594874441624, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.00509560015052557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.004725176375359297, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.004524439573287964, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.004382310900837183, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.004395816475152969, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.002478028181940317, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.002498915884643793, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.002280251821503043, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.0022228718735277653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.0009978917660191655, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0021895053796470165, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.0009255616459995508, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0021764561533927917, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0021534222178161144, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.0007166765863075852, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0007793476688675582, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.05205332115292549, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.04113182798027992, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.03481655940413475, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.031464509665966034, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.023099953308701515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.01759803108870983, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.03173292055726051, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.028830306604504585, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.024608733132481575, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.01791747286915779, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.01769133284687996, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.016107257455587387, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.013766445219516754, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.01119651086628437, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.01052054576575756, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.00805447157472372, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.005933914799243212, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.005416181404143572, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.004938222002238035, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.004463693592697382, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.004193143919110298, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004204153548926115, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0032896047923713923, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0028221500106155872, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.05447425693273544, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.04117640480399132, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.03284000605344772, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.030346181243658066, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.023638052865862846, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.016619499772787094, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.034063730388879776, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03094279021024704, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.025543618947267532, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.01795343868434429, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.018042881041765213, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01720416732132435, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.014735475182533264, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.011486910283565521, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.010596145875751972, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.008609025739133358, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.006112226750701666, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.005448328331112862, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0050283693708479404, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.004407733213156462, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004490647930651903, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004468216095119715, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.003389818826690316, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.002980931894853711, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.17413130402565002, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.14550083875656128, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.13381974399089813, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.11846110224723816, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.07995497435331345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0676523894071579, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09798775613307953, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0888681411743164, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08304678648710251, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0631033405661583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.060407817363739014, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04984351992607117, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.042495205998420715, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.038430068641901016, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.03744768723845482, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.024915508925914764, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.019782157614827156, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.018787842243909836, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.016386505216360092, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01566578447818756, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012944634072482586, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012514198198914528, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.011452881619334221, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007954512722790241, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.161712184548378, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.14270704984664917, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1330554485321045, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.10938835889101028, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.07597436755895615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.06725989282131195, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09359453618526459, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0851096585392952, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0777238979935646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.061067406088113785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.05556383728981018, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04891815781593323, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.041788019239902496, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.03763994574546814, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0365893617272377, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.025009790435433388, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02114717848598957, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.020421000197529793, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01850937306880951, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01789046823978424, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014315526932477951, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01552736759185791, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013036205433309078, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0125060910359025, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.21272538602352142, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20057614147663116, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19674232602119446, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.17855867743492126, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10004542768001556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09611137956380844, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11079726368188858, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10204628854990005, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10082851350307465, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08989549428224564, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0858316570520401, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05659600347280502, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04901865869760513, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04813336208462715, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04792207479476929, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02834773249924183, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.025248538702726364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025021567940711975, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023258965462446213, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02313644252717495, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015262112021446228, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015898847952485085, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.014980955980718136, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011474469676613808, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2383567839860916, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2251606583595276, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22100551426410675, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.200583815574646, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11197074502706528, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10766447335481644, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12369316816329956, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1141180768609047, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11280767619609833, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10071457922458649, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0960698276758194, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06301434338092804, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05453561246395111, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.053584326058626175, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0533660426735878, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0314861424267292, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02753269486129284, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02727190963923931, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025263264775276184, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025126570835709572, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01658833958208561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01650315895676613, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016282537952065468, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010963198728859425, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.18448980152606964, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.16407343745231628, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1560896337032318, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.13890156149864197, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.08352932333946228, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.07591017335653305, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.098420150578022, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.08990012854337692, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.08581990003585815, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07134923338890076, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.06777431815862656, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.04992687702178955, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04299364238977432, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.040170200169086456, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.03947567939758301, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.025076672434806824, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02119818702340126, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.020870577543973923, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.018877021968364716, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.01843925192952156, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013593469746410847, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.013867033645510674, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.012668176554143429, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.009962116368114948, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.04247932508587837, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.03567392751574516, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.031797364354133606, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.028216255828738213, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.01931057870388031, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.01599835231900215, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.02536482736468315, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.023061053827404976, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.020141657441854477, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.015555636025965214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.015057062730193138, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0129014290869236, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.011022648774087429, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.009354689158499241, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.008921196684241295, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.006468139588832855, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.0049543739296495914, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.004636656958609819, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.004229739308357239, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.003942775074392557, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.003426641458645463, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.003426167881116271, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.002840673550963402, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0023568246979266405, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.04340898245573044, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0354565791785717, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.030393395572900772, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0270786564797163, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.019382769241929054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.015208295546472073, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.026830261573195457, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.024327663704752922, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.02041776105761528, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.015389776788651943, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.015115696005523205, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.013621204532682896, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0115964962169528, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.009344705380499363, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.008739866316318512, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.006814038846641779, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.004898261744529009, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0044709183275699615, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.004119866527616978, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0037064701318740845, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0035412113647907972, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0034274859353899956, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0027307707350701094, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0022031962871551514, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.17972958087921143, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.15323807299137115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1424674689769745, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.12550607323646545, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08260562270879745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07172985374927521, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0991743952035904, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09113623946905136, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08540598303079605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06649088859558105, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06312823295593262, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.050442591309547424, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04354037344455719, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.03964325785636902, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.03870070353150368, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02517683431506157, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02027781680226326, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.019402271136641502, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.016985667869448662, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.016319824382662773, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01293979212641716, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01258805301040411, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.011563071981072426, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007805773988366127, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.17582905292510986, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1547352373600006, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1469496786594391, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.12484617531299591, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08215204626321793, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07434201240539551, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09632080048322678, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08747892826795578, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08374869078397751, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06637221574783325, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06111482158303261, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04968172684311867, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.042579181492328644, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04015921801328659, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03959375619888306, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02509189583361149, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.021953972056508064, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02143106609582901, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01907191425561905, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018707429990172386, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.013998120091855526, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01521282084286213, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013228679075837135, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01193089596927166, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.216622993350029, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20428302884101868, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20047205686569214, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1819128841161728, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10216528177261353, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09812510758638382, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1130395457148552, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10423605144023895, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10298515111207962, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09176081418991089, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.08757344633340836, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0577213317155838, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.049974627792835236, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04905922710895538, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.048844508826732635, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.028882168233394623, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.025503437966108322, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025254234671592712, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02342900075018406, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02329966239631176, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015423670411109924, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01571705751121044, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015126707032322884, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010959086939692497, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24790097773075104, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23391936719417572, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22955842316150665, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2086087167263031, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1169353500008583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11233717948198318, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12923663854599, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11919927597045898, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11786085367202759, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1050630435347557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.1003982424736023, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06592157483100891, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05702248960733414, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05601657181978226, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.055784594267606735, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03296525031328201, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028784336522221565, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028499199077486992, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02638726681470871, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02623438835144043, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017425891011953354, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017240937799215317, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017100902274250984, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01142944861203432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2105351686477661, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18798692524433136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1796366423368454, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16047269105911255, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09587205201387405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0875600054860115, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1122228354215622, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10245182365179062, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09845893830060959, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0822247713804245, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07826165854930878, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05700371414422989, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.049014538526535034, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04605033993721008, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04533590003848076, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02863682247698307, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024141548201441765, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02379610948264599, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02149738371372223, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021036367863416672, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015483438968658447, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015527913346886635, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014513646252453327, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01093379594385624, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.059429287910461426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.050487201660871506, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.045248448848724365, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.04040319472551346, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.02709692344069481, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.02264808677136898, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.03569115325808525, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.032285284250974655, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.02819870226085186, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.02217845246195793, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.021518327295780182, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.01818643882870674, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.015501677989959717, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.013154448010027409, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.012554226443171501, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.009133047424256802, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.007024434395134449, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0065884278155863285, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006089603062719107, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.005694376304745674, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.004867455456405878, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004920308478176594, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00405193492770195, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.003467829432338476, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.05749429762363434, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.04753720760345459, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04155709967017174, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0372186079621315, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.025687705725431442, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.020702961832284927, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.035125140100717545, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.031587883830070496, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.027101648971438408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.020790232345461845, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.020386379212141037, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01792723871767521, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.015128351747989655, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01245961245149374, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.011753915809094906, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.008999534882605076, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.006594139616936445, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006096479017287493, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005644641350954771, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.005173726938664913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004741288721561432, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004632833879441023, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.003775303717702627, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0031281381379812956, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.1956423819065094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16935080289840698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15912514925003052, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.140791118144989, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0901251882314682, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.07979340106248856, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1068492904305458, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09820197522640228, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09296363592147827, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0738966092467308, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07010366767644882, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.054319195449352264, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04691570624709129, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04326123744249344, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04236561432480812, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027142075821757317, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.022224465385079384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.021398812532424927, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018899081274867058, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.018275143578648567, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014063077978789806, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013800300657749176, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0127536216750741, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008746268227696419, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.17015038430690765, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.15099701285362244, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.14372630417346954, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.12136300653219223, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.079572893679142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.07242459058761597, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09446445852518082, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0848444327712059, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08097365498542786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06479085981845856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.059637635946273804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04877527058124542, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04134995490312576, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.03897947072982788, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03840384632349014, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.024717826396226883, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.021358216181397438, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02084035612642765, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01864142157137394, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018283886834979057, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01392346527427435, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014827035367488861, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013188133016228676, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011656275950372219, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.19116167724132538, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17935815453529358, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17543888092041016, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.15893539786338806, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09022564440965652, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08613544702529907, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10078020393848419, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09271172434091568, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.09110148251056671, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08053067326545715, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0769113078713417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.051457006484270096, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04446857422590256, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04336485266685486, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.043097928166389465, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.025793585926294327, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02251393161714077, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.022243596613407135, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02057492919266224, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.020409803837537766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013822957873344421, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013878840953111649, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01345028541982174, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009594548493623734, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.249443918466568, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23461590707302094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22985875606536865, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20853030681610107, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11782679706811905, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11274784803390503, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1310238391160965, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12061074376106262, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1188916563987732, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10543203353881836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10075937211513519, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06682287156581879, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057760268449783325, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.056509509682655334, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05621224641799927, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033489905297756195, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.029046256095170975, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028721939772367477, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026520688086748123, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02634052187204361, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017742738127708435, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01744142919778824, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017326993867754936, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011553558520972729, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2108248621225357, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18862482905387878, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18018738925457, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16069649159908295, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09630505740642548, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08797769993543625, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11347953230142593, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10312551259994507, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09884762763977051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08279746770858765, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07862772047519684, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05775870382785797, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04948687553405762, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04641502723097801, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.045659612864255905, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029050881043076515, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02453221008181572, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02418127842247486, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021935997530817986, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021465664729475975, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015772074460983276, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01605650968849659, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01473761536180973, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011597799137234688, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.0707099437713623, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.061780206859111786, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05675942450761795, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.050556641072034836, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03246532008051872, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.02824452891945839, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.041338320821523666, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03763479366898537, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.033523958176374435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.027190782129764557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.026149768382310867, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02097192220389843, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.018009603023529053, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.01571124978363514, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.015128208324313164, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010519883595407009, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.008300147019326687, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.007876378484070301, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007271879352629185, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00688767246901989, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005578868556767702, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005627432372421026, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004780585877597332, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0038857385516166687, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.066016785800457, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0567089281976223, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05105500668287277, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04536789655685425, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.029971446841955185, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.02529563568532467, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.039276499301195145, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03580373153090477, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.031143885105848312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.024744290858507156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.023993082344532013, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.019922686740756035, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01705678179860115, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014436082914471626, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.013749835081398487, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009958875365555286, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0074943299405276775, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.007005816325545311, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006460587028414011, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.005999429617077112, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0051412503235042095, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005031955894082785, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0042121619917452335, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003211486153304577, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21341216564178467, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19018708169460297, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18172939121723175, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1612829864025116, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09939324110746384, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09051097184419632, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11481019854545593, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10566185414791107, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10158407688140869, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0835948958992958, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07903014868497849, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.058342646807432175, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.050465770065784454, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04756516218185425, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.046862538903951645, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029081761837005615, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024158019572496414, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023479217663407326, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020882498472929, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020399251952767372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014918181113898754, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014420907013118267, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013877099379897118, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00870658177882433, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2004479020833969, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1815594732761383, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17428769171237946, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15062196552753448, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0934571921825409, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08644939213991165, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10871249437332153, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09932875633239746, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0952419564127922, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07883031666278839, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0737006813287735, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05594952777028084, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047921501100063324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04516969993710518, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.044516317546367645, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02818223275244236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023871997371315956, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023317407816648483, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021018652245402336, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020601050928235054, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015320145525038242, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015568440780043602, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014364172704517841, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01125575415790081, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.18581734597682953, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17428819835186005, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17037014663219452, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1542072296142578, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08769477158784866, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08364803344011307, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09785647690296173, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09022368490695953, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08856120705604553, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07820048928260803, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0745546817779541, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04988419637084007, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04319467395544052, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.04206140339374542, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04178681597113609, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024958964437246323, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02167522720992565, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02139732614159584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019748196005821228, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019578713923692703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01318168081343174, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013133091852068901, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01280191633850336, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008772125467658043, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24937038123607635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23457171022891998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.229762464761734, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2083485871553421, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11792424321174622, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11277458071708679, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13100942969322205, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12082965672016144, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11897378414869308, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10547393560409546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10063162446022034, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06678463518619537, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057796597480773926, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0564882755279541, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05617692321538925, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03339753672480583, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02893373742699623, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02859564498066902, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02638385444879532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026186857372522354, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0175301656126976, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01722753793001175, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017097147181630135, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011179547756910324, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21281442046165466, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19157420098781586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18291720747947693, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16356077790260315, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09775304049253464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0893801897764206, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1157505214214325, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10523559153079987, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10020489990711212, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08440925180912018, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08046968281269073, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.059103693813085556, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05057769641280174, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.047144025564193726, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04630293324589729, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02982448972761631, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024937385693192482, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02451920323073864, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022359855473041534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021830424666404724, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01625050976872444, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016376351937651634, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015074138529598713, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011785031296312809, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.06736095994710922, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05850277841091156, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05263898894190788, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.04690441116690636, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.030839543789625168, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.026141680777072906, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04099443927407265, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03713516145944595, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.03195633739233017, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.025746546685695648, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.024982541799545288, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02084006369113922, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01777108572423458, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.014947200194001198, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.014211481437087059, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010444537736475468, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.007935531437397003, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.007424741517752409, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006952193100005388, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.00646902434527874, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005501102656126022, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005533745512366295, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.004491003230214119, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0038036778569221497, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06291142851114273, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05363722890615463, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04728347063064575, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04214861989021301, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.02843482233583927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.023368144407868385, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03889744356274605, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03522397577762604, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.029656274244189262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.023452766239643097, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.022926922887563705, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01972186379134655, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01681334897875786, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.013745267875492573, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.012939329259097576, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009843176230788231, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007219079881906509, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.00666078319773078, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006245059426873922, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0057061645202338696, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005128128919750452, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005031541921198368, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.00404308782890439, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003294234164059162, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.19120492041110992, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17044714093208313, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16165700554847717, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1436505764722824, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08900526911020279, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08057063072919846, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10517553985118866, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09654900431632996, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09103549271821976, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07500453293323517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07114522904157639, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05343444645404816, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.046126361936330795, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04270853474736214, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04186800867319107, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.026663409546017647, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02177145704627037, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02105731889605522, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018895495682954788, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01833113469183445, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013688582926988602, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013297514989972115, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.012444913387298584, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008201966993510723, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19472181797027588, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17141377925872803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1618393063545227, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13958077132701874, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09066040068864822, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08121447265148163, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10866667330265045, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09842651337385178, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09308196604251862, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0742885172367096, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06929119676351547, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05600668862462044, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04764493927359581, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.044125087559223175, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04326813668012619, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02813330478966236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023635374382138252, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022908326238393784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020499933511018753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.019947217777371407, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015239032916724682, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01595865935087204, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014070725999772549, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011879022233188152, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1783990114927292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1675712913274765, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1638278216123581, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14857570827007294, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08430255204439163, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.08039572834968567, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09428475052118301, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08692128956317902, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08510344475507736, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07532866299152374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07201413810253143, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04813069850206375, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.041658978909254074, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0404743030667305, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04020104929804802, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024104945361614227, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02094990573823452, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020680181682109833, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019152211025357246, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018975339829921722, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012808680534362793, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012845128774642944, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012405384331941605, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008738097734749317, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24367792904376984, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22940070927143097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2245486080646515, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20387358963489532, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11532915383577347, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11030378937721252, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12842780351638794, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11842396855354309, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1163441389799118, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10327959805727005, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09873310476541519, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06545084714889526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05668303743004799, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05528688058257103, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05495484918355942, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032735928893089294, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028394978493452072, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028056936338543892, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025940438732504845, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02573263645172119, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01719418168067932, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01704915426671505, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01672416739165783, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011216475628316402, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21778380870819092, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19688081741333008, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18871192634105682, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1687031388282776, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10040926188230515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09241248667240143, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1183001846075058, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10711745172739029, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10278403759002686, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08694463223218918, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0829339474439621, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06026359274983406, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05143721029162407, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04842030256986618, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.047688078135252, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03032141923904419, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02557692490518093, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025214720517396927, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02297350764274597, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02251240611076355, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016450796276330948, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01667746715247631, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01544105727225542, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012030255980789661, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.07367783784866333, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06577560305595398, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05925668030977249, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05276225507259369, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03404126316308975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.029168719425797462, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04615241661667824, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.041651226580142975, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.03492291644215584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.029117170721292496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.028337111696600914, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.023541787639260292, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.020011236891150475, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0165522713214159, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.015633266419172287, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01180490106344223, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.008875789120793343, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008282976225018501, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007939198054373264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007361819036304951, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006247787736356258, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006342321168631315, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005009360611438751, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00445551797747612, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.0647585391998291, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0571335032582283, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05025314539670944, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.04461803659796715, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.029613936319947243, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.024582702666521072, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.041327591985464096, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03757534176111221, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03052380494773388, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.025144614279270172, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.02453138306736946, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.020981362089514732, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01805468462407589, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014339398592710495, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.013338548131287098, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010545329190790653, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007569686975330114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.006910018157213926, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00670273182913661, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.006042126566171646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005486221052706242, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005409790202975273, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004187562968581915, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003528876230120659, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2058941125869751, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18587300181388855, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17797812819480896, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15775318443775177, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.096331387758255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08846497535705566, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11245382577180862, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10265275090932846, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09811501950025558, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08188604563474655, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07752341777086258, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05724404379725456, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0491163395345211, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04619455337524414, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04548608511686325, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.028586961328983307, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02358260564506054, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02294662781059742, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020601686090230942, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020145056769251823, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014743717387318611, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014270693063735962, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013596445322036743, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008925866335630417, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.197991281747818, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17732475697994232, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1682651937007904, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14461958408355713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09292559325695038, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08415035903453827, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11221474409103394, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10081879794597626, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09479181468486786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07659415900707245, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0726204365491867, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0580052025616169, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04900123551487923, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.045409735292196274, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.044541794806718826, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02926446497440338, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024622106924653053, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023924164474010468, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02156474068760872, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021009691059589386, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016203250735998154, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016954928636550903, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015069892629981041, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012949194759130478, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17519927024841309, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16464219987392426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.161061629652977, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14621929824352264, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08302706480026245, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07926326245069504, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09269014745950699, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08534868061542511, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08379685133695602, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07418477535247803, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07087447494268417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04740827903151512, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.041009921580553055, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.039971973747015, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03972139582037926, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023777948692440987, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020831728354096413, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020579243078827858, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019069025292992592, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018916022032499313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012710647657513618, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012914580292999744, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012361171655356884, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00902425404638052, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2449420988559723, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23067130148410797, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22602692246437073, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20525994896888733, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11628666520118713, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11125268787145615, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12949486076831818, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11913140118122101, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11732568591833115, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10410089045763016, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09946314245462418, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06631690263748169, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05722323805093765, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05593584477901459, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05563706159591675, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03326296806335449, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028942126780748367, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028619840741157532, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02646755799651146, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026279639452695847, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01770423725247383, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017610803246498108, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017273904755711555, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011972514912486076, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21304525434970856, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19178789854049683, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18327634036540985, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1642586588859558, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09811364114284515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08974999189376831, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11621700972318649, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10549409687519073, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10067223012447357, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08474992215633392, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08109413832426071, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05936891958117485, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.050631340593099594, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04732298478484154, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.046505194157361984, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029866743832826614, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024911455810070038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024513499811291695, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02230704203248024, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021795252338051796, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016142074018716812, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01618196815252304, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015034484677016735, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011514715850353241, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.074038565158844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06553629040718079, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.060518618673086166, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.05349210649728775, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0341249518096447, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.02998236194252968, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04308468848466873, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03941451013088226, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.035029374063014984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.028674323111772537, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.027432451024651527, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.021878225728869438, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.01882920414209366, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.016480902209877968, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.01588302105665207, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010963949374854565, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.008638854138553143, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.008206360973417759, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0075634135864675045, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.007171918172389269, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005757412873208523, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005761594045907259, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0049388213083148, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0038797801826149225, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.06627579778432846, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05759117007255554, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.052308619022369385, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.046006619930267334, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.030184181407094002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.025835860520601273, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.03909878432750702, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.035609643906354904, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.031194616109132767, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02495206892490387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.023960845544934273, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.019860809668898582, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.017014186829328537, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.014548170380294323, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.013915235176682472, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009948202408850193, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.007590270601212978, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.007129414938390255, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006546585354954004, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0061218529008328915, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005171206779778004, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005087406374514103, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.004293059464544058, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0033178499434143305, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.18960613012313843, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16903452575206757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1610182672739029, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.14169713854789734, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08798688650131226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0799580067396164, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10394104570150375, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09448288381099701, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08999195694923401, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07385088503360748, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0697401836514473, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0530192032456398, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04527506232261658, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04228612408041954, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.041554052382707596, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.026489172130823135, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.021725919097661972, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.021088503301143646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01883508265018463, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01835498958826065, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013669688254594803, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013410399667918682, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01249117311090231, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008668226189911366, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20540349185466766, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18710359930992126, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1798321008682251, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15810038149356842, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09633497893810272, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08903196454048157, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1126752644777298, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10233273357152939, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09795583784580231, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08228336274623871, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07756678760051727, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05797543004155159, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04934670776128769, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04660648852586746, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04593852907419205, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029049981385469437, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024483121931552887, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023914940655231476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021707523614168167, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02127746492624283, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015404862351715565, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01577438972890377, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01451342087239027, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01121283508837223, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17224575579166412, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1619013100862503, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15840058028697968, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14376792311668396, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08161541819572449, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07794618606567383, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09128625690937042, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08398889005184174, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0823531225323677, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0729789286851883, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06975962221622467, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04670054465532303, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040322255343198776, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.039255980402231216, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03899772837758064, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02339850552380085, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02040065824985504, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02014812082052231, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018658800050616264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018501605838537216, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012443777173757553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012573452666401863, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012080347165465355, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008673361502587795, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24135351181030273, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.227128803730011, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22240985929965973, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20201261341571808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11453607678413391, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10947826504707336, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1277228742837906, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11748623847961426, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11557534337043762, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10244280099868774, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09784458577632904, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06529528647661209, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05634771287441254, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05501413345336914, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05470595881342888, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03269248828291893, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028323737904429436, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.0279903095215559, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025845041498541832, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0256486926227808, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01720031350851059, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01705731265246868, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01674705371260643, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011333904229104519, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21719498932361603, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1964407116174698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18826232850551605, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16786135733127594, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10023327171802521, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09220444411039352, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11765298247337341, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10704556107521057, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10265589505434036, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0866365134716034, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08237504214048386, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06016233563423157, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05151059105992317, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04845895990729332, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.047708023339509964, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030396005138754845, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025745993480086327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025378016754984856, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023130416870117188, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022661956027150154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01671852171421051, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01695963367819786, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015718631446361542, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012424970045685768, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09517188370227814, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08461279422044754, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07850134372711182, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06936830282211304, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04394609108567238, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03883737325668335, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05541246011853218, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.050298940390348434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04502677544951439, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03706563264131546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0354919359087944, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02820482850074768, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.024145109578967094, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.021296173334121704, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.020577501505613327, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014181503094732761, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.011342518031597137, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.010834941640496254, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010018869303166866, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.009565534070134163, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007571290712803602, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007744689006358385, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006558975670486689, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005531221628189087, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08318299055099487, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07314909249544144, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06701867282390594, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.059032898396253586, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03810567036271095, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03301380202174187, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.048964399844408035, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04455339536070824, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03919488191604614, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.031847622245550156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.030590953305363655, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024910183623433113, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021300824359059334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01836010254919529, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017620379105210304, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012489321641623974, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009579574689269066, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009033167734742165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008348152972757816, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007842461578547955, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006500049494206905, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006400896701961756, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005460354965180159, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004196523688733578, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.19856905937194824, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17997035384178162, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17312023043632507, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1525103896856308, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0928400531411171, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08590266108512878, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1068456843495369, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09790404140949249, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09441084414720535, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07877679169178009, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07408158481121063, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05432078242301941, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04678201675415039, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04448894038796425, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04394536465406418, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027111783623695374, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02259121835231781, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022073594853281975, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01964123733341694, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.0192802082747221, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01387289073318243, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013397667557001114, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013016646727919579, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008179263211786747, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20371215045452118, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18316438794136047, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17495179176330566, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1526411920785904, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09557820111513138, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08757194131612778, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11191125959157944, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1019691452383995, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09732487052679062, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0796613022685051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07539287209510803, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05754103511571884, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04911338910460472, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.046160709112882614, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04543784260749817, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02879364974796772, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.024074813351035118, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02346820943057537, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020890971645712852, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020424261689186096, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015158380381762981, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015322624705731869, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014170492067933083, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010594764724373817, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1696627140045166, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15896768867969513, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1552855223417282, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1407674252986908, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08034353703260422, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0764881819486618, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.090049609541893, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08280954509973526, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08115306496620178, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07156296819448471, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06840084493160248, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04612066224217415, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03982474282383919, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.038713857531547546, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0384504608809948, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023158762603998184, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020234623923897743, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019980663433670998, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01848628558218479, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018323823809623718, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012408536858856678, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012662725523114204, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012035772204399109, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008943306282162666, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2353329360485077, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22076860070228577, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21585986018180847, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19580364227294922, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11157496273517609, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10634195059537888, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12469895929098129, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11458714306354523, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11266400665044785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09937041252851486, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09506319463253021, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06384989619255066, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05503213405609131, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.053653959184885025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.053319402039051056, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03204981982707977, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02778116799890995, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027437077835202217, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02531701885163784, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02511373907327652, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01708190143108368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016966886818408966, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01661401055753231, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011560757644474506, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22263497114181519, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20088443160057068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1923971027135849, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17110668122768402, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10274413973093033, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09428933262825012, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12080124765634537, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10983000695705414, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10520501434803009, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08848089724779129, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08380606770515442, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06141052395105362, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05274571478366852, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04953913390636444, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.048761751502752304, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03089781105518341, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02607247419655323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025683844462037086, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02330079674720764, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022799266502261162, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01667146384716034, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016886897385120392, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01560292113572359, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01200506929308176, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.08393171429634094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07505049556493759, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06999325752258301, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.06139393150806427, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.03894718736410141, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.03468405455350876, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.048154208809137344, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.044105395674705505, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.03975323587656021, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03270396217703819, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.031055709347128868, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02447563409805298, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.021102355793118477, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.018781127408146858, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.018200911581516266, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012269623577594757, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.009829974733293056, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.009397636167705059, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.008600525557994843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.008218439295887947, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006446275394409895, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006478949449956417, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.005639456678181887, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004381381440907717, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.07742268592119217, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06845533847808838, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06357160210609436, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.055493079125881195, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.03554588556289673, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.031393587589263916, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.044554900377988815, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04046444222331047, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03643776848912239, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02962629124522209, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.028170621022582054, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.022587383165955544, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01932452991604805, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.017106054350733757, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.016538506373763084, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0112953782081604, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.00884407851845026, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.008425096049904823, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007649581413716078, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007274645380675793, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0058523607440292835, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005721792578697205, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005024222191423178, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0036750384606420994, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.18691526353359222, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16819357872009277, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16114231944084167, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1410137414932251, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.08738869428634644, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08021152019500732, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10187017917633057, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0924951359629631, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.08896688371896744, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07322871685028076, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.06882002204656601, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0519498810172081, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04428263008594513, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.041916608810424805, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04137193039059639, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02594599686563015, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02143796533346176, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.020890677347779274, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018523378297686577, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.01814170926809311, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013369977474212646, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012945874594151974, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01241512130945921, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008182226680219173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20309017598628998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1834375411272049, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17508631944656372, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15353836119174957, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0948585718870163, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08689815551042557, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11238095164299011, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10197339951992035, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09655055403709412, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08011981844902039, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07600995898246765, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.057905152440071106, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04921711981296539, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04595429450273514, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04515860602259636, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029104197397828102, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02434161677956581, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023714402690529823, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02148466743528843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02097354084253311, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01572168432176113, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016042085364460945, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014660346321761608, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011637135408818722, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16950641572475433, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15798845887184143, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15387758612632751, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13902968168258667, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08007554709911346, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07588233053684235, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09032129496335983, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08299194276332855, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08101397007703781, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07091766595840454, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06760681420564651, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04621973633766174, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03989802300930023, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0385873056948185, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03827787935733795, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02320573851466179, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02019195258617401, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019900618121027946, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01836569793522358, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018174324184656143, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012419478967785835, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012691842392086983, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011979002505540848, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008975637145340443, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2298010140657425, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2144504338502884, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2092389464378357, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18892627954483032, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10863033682107925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10306163877248764, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12185511738061905, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11211801320314407, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1098717525601387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0961858481168747, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09158548712730408, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06228136643767357, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.053771235048770905, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05217870697379112, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05180886760354042, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031182583421468735, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026929466053843498, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026547688990831375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024395408108830452, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02415633760392666, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016370408236980438, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016370903700590134, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015812449157238007, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010967510752379894, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2354864925146103, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21317961812019348, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20472057163715363, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18152977526187897, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10914544016122818, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10060641914606094, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12788309156894684, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11597128212451935, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11162042617797852, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09404470026493073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08894719183444977, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06545676290988922, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.055789243429899216, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05271897837519646, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05196051299571991, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03296603634953499, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027875090017914772, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027500668540596962, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02494918741285801, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024469049647450447, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017919423058629036, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018160153180360794, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016875453293323517, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01313790027052164, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09784846752882004, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08714821189641953, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08130165189504623, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0711311399936676, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.045534778386354446, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.040521640330553055, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.056232746690511703, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0512208454310894, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.046500202268362045, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03803909569978714, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03610457107424736, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.028607770800590515, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02458801120519638, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.022038014605641365, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02140021324157715, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014367709867656231, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.011660163290798664, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011179155670106411, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010202042758464813, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.009790685027837753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007625482976436615, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007807052694261074, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006732278037816286, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005499145947396755, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08366907387971878, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07409626990556717, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06809251755475998, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.059418898075819016, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.038556355983018875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03373530134558678, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.049072541296482086, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04470282420516014, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.03942452371120453, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.032047905027866364, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.030604494735598564, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024985291063785553, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021347269415855408, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.018582122400403023, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017899226397275925, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012472085654735565, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.00971852708607912, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009200895205140114, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008442026562988758, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.007977893576025963, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006545293144881725, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006485326681286097, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005575304850935936, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004317851271480322, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20245057344436646, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18016932904720306, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1716720312833786, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.14916986227035522, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0941910371184349, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0855548083782196, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11113234609365463, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10043890029191971, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.09606320410966873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07791279256343842, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0729936808347702, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.056659746915102005, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04804326593875885, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.04516096040606499, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04447411745786667, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.028364934027194977, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.023119835183024406, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0224587582051754, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019771605730056763, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019303614273667336, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014618952758610249, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01406053826212883, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013355806469917297, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008896543644368649, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20401598513126373, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18407049775123596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16908767819404602, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14934299886226654, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09605430066585541, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.084228515625, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12639625370502472, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11347153782844543, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0977824404835701, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08174189180135727, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0791897252202034, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06552208214998245, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05501925200223923, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04692504554986954, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04484107345342636, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0328499935567379, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.025314144790172577, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02394971251487732, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022607645019888878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021303987130522728, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017673304304480553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01798536814749241, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014962214045226574, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013094013556838036, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17122673988342285, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15915635228157043, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15480679273605347, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1397145837545395, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08096978068351746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0764944776892662, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0917956605553627, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08417666703462601, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08194903284311295, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07146643102169037, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06823710352182388, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.047104980796575546, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04058201611042023, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03914420306682587, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03879933804273605, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02367425337433815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02066514641046524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020349275320768356, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018805112689733505, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018594548106193542, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012820898555219173, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013268773443996906, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012320957146584988, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009650635533034801, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23167279362678528, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21561482548713684, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21011582016944885, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1895415335893631, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10937478393316269, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10357153415679932, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12322493642568588, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11318297684192657, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1106467992067337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09654911607503891, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09193675965070724, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06281998753547668, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.054267365485429764, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.052534304559230804, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0521274134516716, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03146304562687874, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027070753276348114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026663675904273987, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024458257481455803, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02419765293598175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016440462321043015, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016405973583459854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015818128362298012, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010890171863138676, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2350761443376541, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2130195051431656, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20469136536121368, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18174214661121368, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10915674269199371, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10075971484184265, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12778858840465546, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11575864255428314, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.1116328164935112, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0941653847694397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08914725482463837, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06538465619087219, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05573698505759239, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05271421745419502, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.051972150802612305, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03295387327671051, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027815580368041992, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027464674785733223, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024892732501029968, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024433713406324387, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017790399491786957, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01802965998649597, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016740363091230392, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01297715213149786, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10213448852300644, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09059461951255798, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08353947103023529, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07320556789636612, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04742788150906563, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04164135456085205, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.061232246458530426, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.054716985672712326, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04855695739388466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.039641398936510086, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03820360451936722, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03133053705096245, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.026379799470305443, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.023056039586663246, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02221721224486828, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015814727172255516, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012349138967692852, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011757923290133476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0108657106757164, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010327326133847237, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008496139198541641, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008551104925572872, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00724857859313488, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006168616935610771, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08861259371042252, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07803276926279068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07153164595365524, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0625058114528656, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.040772341191768646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03547239303588867, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05287008360028267, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04739425331354141, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.041820719838142395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03386466205120087, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.032432060688734055, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02687576785683632, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.022749416530132294, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.019716627895832062, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.018952764570713043, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013518748804926872, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010389360599219799, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.00984729453921318, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009040518663823605, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008535055443644524, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007104703225195408, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0070512262172997, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005986359901726246, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00483987620100379, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2013481706380844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1808604598045349, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17261047661304474, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1512441784143448, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.09441953152418137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0861954465508461, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11156808584928513, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10088842362165451, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0962187647819519, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07913628220558167, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07452177256345749, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05689484253525734, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04833357036113739, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.045350294560194016, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.044637564569711685, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02848188765347004, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.023225676268339157, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022564580664038658, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02007279358804226, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.019588584080338478, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014699573628604412, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014180702157318592, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013495882041752338, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008990817703306675, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.21801051497459412, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19381055235862732, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18433310091495514, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16219227015972137, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10190075635910034, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09219647198915482, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12069381028413773, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10921867936849594, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10420414805412292, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08518797159194946, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08042462915182114, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.062328748404979706, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.052858591079711914, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04953298717737198, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.048723254352808, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03136371076107025, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026287196204066277, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025538107380270958, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023017579689621925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022508621215820312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016937723383307457, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017335345968604088, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015823574736714363, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012679843232035637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17652691900730133, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16388799250125885, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1593734323978424, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1438368707895279, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08363724499940872, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07892744243144989, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09487782418727875, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0867752656340599, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08464326709508896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07361556589603424, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07044187188148499, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04873049631714821, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.041925620287656784, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.0405149981379509, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04017140343785286, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024557579308748245, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021535150706768036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.021219156682491302, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019584089517593384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019381146878004074, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01349694561213255, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014004390686750412, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.013037246651947498, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010403469204902649, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.23999305069446564, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22306343913078308, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21734552085399628, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19587664306163788, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11358141899108887, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10745662450790405, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12756726145744324, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11725690215826035, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11492633074522018, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09992539882659912, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09511265158653259, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06522481888532639, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05627955123782158, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.054611045867204666, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.054216042160987854, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03265160322189331, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028263237327337265, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02785031497478485, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025490660220384598, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025240223854780197, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01719280704855919, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017288606613874435, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016627898439764977, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011713051237165928, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23746900260448456, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21405449509620667, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20410244166851044, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18061111867427826, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10996027290821075, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10051450878381729, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13110269606113434, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11881706118583679, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11268401890993118, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09438532590866089, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08923493325710297, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06683694571256638, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05716240406036377, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05321096256375313, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0522356815636158, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03376217186450958, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028276832774281502, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.027786383405327797, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02527434006333351, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024654366075992584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01840941049158573, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01872275210916996, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01708182692527771, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013625477440655231, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10513971000909805, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09437047690153122, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08894384652376175, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07805503159761429, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.049004800617694855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.044231805950403214, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.059319738298654556, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05410514399409294, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04991304501891136, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.041197240352630615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.039057549089193344, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030163966119289398, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.025879420340061188, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02359704114496708, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023036081343889236, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015118499286472797, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012289994396269321, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011846402660012245, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010727720335125923, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010357162915170193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007913961075246334, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007897896692156792, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007081018760800362, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00530292559415102, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08859239518642426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07945521175861359, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07418045401573181, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06507952511310577, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04111255332827568, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03665520250797272, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05097787827253342, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04648109897971153, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04191294685006142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.034609127789735794, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.032905805855989456, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.025883397087454796, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.022250279784202576, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.019773833453655243, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019170965999364853, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012946114875376225, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010278001427650452, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009826304391026497, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008976303972303867, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00857001543045044, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0067524113692343235, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006677622441202402, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005901159718632698, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004390939138829708, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.21684008836746216, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1946183145046234, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18661653995513916, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1631905436515808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10123535990715027, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09285802394151688, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11813905090093613, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10693691670894623, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10308261215686798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08461876958608627, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0795077234506607, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0602823905646801, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.051144931465387344, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.048528850078582764, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.047886885702610016, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030113428831100464, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.024677211418747902, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.024043619632720947, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.021232549101114273, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02079475112259388, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015463806688785553, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014703995548188686, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014264584518969059, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00899563916027546, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2356099784374237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21561990678310394, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20742575824260712, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18594995141029358, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11040361225605011, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10246383398771286, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12888050079345703, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11746399104595184, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11213745921850204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09564045071601868, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.09182599186897278, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06639424711465836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05670878663659096, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05352819338440895, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0527530163526535, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.033280789852142334, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.028375007212162018, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.027734750881791115, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025472469627857208, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02500554732978344, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01788804680109024, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01859644055366516, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01687384769320488, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013582265004515648, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17821051180362701, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16550152003765106, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16080768406391144, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14500711858272552, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08448494970798492, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07970091700553894, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09584636241197586, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08792133629322052, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08555286377668381, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07436776161193848, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.07102954387664795, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04926975816488266, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.042390502989292145, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.040847767144441605, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.040481917560100555, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02472461573779583, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.021580331027507782, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.021235981956124306, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01958940550684929, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.019360356032848358, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013343929313123226, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01387135498225689, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012815498746931553, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010091893374919891, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2449411153793335, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22777880728244781, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22184553742408752, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20002630352973938, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1160285621881485, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10973817110061646, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1305703967809677, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1198972761631012, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11737370491027832, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1020795926451683, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09723819047212601, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06680846959352493, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057540107518434525, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05578255653381348, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05536223202943802, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03348810225725174, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02884039655327797, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02840460278093815, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026016419753432274, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025751786306500435, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017714694142341614, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017606979236006737, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01710336282849312, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011874868534505367, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.24976953864097595, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22455178201198578, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2150554209947586, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.18983623385429382, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.11572489142417908, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.10608524084091187, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.13604633510112762, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.12329156696796417, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11865974962711334, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09883805364370346, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.09328248351812363, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06963835656642914, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0592951774597168, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.055838435888290405, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05500001087784767, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03507917374372482, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.029352087527513504, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02895352616906166, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02603643201291561, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02550412528216839, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01891564391553402, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01892268657684326, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017761727795004845, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01343963947147131, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1076059490442276, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09684832394123077, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09047286957502365, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07975132763385773, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.050171416252851486, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.044930845499038696, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06256786733865738, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05675194412469864, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05115475878119469, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04253876581788063, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0406828410923481, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03186258301138878, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.027201196178793907, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024263229221105576, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023515846580266953, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01595373824238777, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012778133153915405, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012244570069015026, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011280562728643417, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010806883685290813, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008418255485594273, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008511403575539589, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.0073488433845341206, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005898963660001755, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08905353397130966, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08051663637161255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07362667471170425, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06505882740020752, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04131889343261719, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.036224961280822754, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05414384603500366, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04921858385205269, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04213527590036392, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03535354509949684, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.034009505063295364, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.027407895773649216, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.023517848923802376, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01994926482439041, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01904025487601757, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013726616278290749, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010440610349178314, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009825076907873154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009260773658752441, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00867227278649807, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007182123139500618, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007079294417053461, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005976088810712099, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004682786762714386, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22829243540763855, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20567598938941956, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19695407152175903, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1726304143667221, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10687625408172607, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09795934706926346, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12685547769069672, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11363246291875839, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1088341474533081, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08996102958917618, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08487183600664139, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06496606022119522, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05446653813123703, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05130676180124283, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05054136738181114, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.032537199556827545, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.026333078742027283, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02564692497253418, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022859662771224976, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022347306832671165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01680324412882328, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016068018972873688, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015273471362888813, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010291450656950474, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.250342458486557, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21895140409469604, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20610927045345306, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18208758533000946, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11674325913190842, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10435302555561066, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.14101754128932953, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12751898169517517, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.12014450132846832, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09582962840795517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.09239934384822845, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.07277754694223404, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.061747923493385315, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.056744642555713654, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05549295246601105, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03647863492369652, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.030136888846755028, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.0291903056204319, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.026061702519655228, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.025257492437958717, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.019496966153383255, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.02008783258497715, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017880186438560486, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.014532292261719704, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16754364967346191, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1558711677789688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1511264592409134, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1366448998451233, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07953861355781555, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0749373659491539, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0909559577703476, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0833430141210556, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08055821061134338, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07028532773256302, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06720509380102158, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04672284796833992, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04027493670582771, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.038560137152671814, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038154806941747665, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023473886772990227, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020502427592873573, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.020154139026999474, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018697086721658707, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01845015212893486, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012704378925263882, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013398195616900921, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.012115539982914925, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009908055886626244, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24120986461639404, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2250756323337555, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21918590366840363, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19801142811775208, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11454339325428009, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10846084356307983, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12936441600322723, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11875104904174805, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11584048718214035, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10131033509969711, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09657444804906845, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0662502869963646, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057070426642894745, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05516839399933815, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.054709140211343765, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033229753375053406, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028678802773356438, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02824511006474495, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02600705623626709, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02572290413081646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01766957901418209, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017759574577212334, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017000364139676094, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012236660346388817, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2367805391550064, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21233344078063965, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20221582055091858, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1791759431362152, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10950534045696259, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0997038260102272, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1306614875793457, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11826014518737793, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11231815069913864, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09364253282546997, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08888327330350876, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06708041578531265, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05707594007253647, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05323224887251854, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05220015347003937, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03411172330379486, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028582356870174408, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.028062326833605766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025575485080480576, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02492874674499035, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01890270970761776, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.019314102828502655, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017424853518605232, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.014419695362448692, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11876166611909866, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1078731119632721, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10129885375499725, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08985564112663269, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05555013194680214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.050173692405223846, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06921203434467316, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.062357597053050995, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05652579665184021, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0476093590259552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0456729456782341, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03532753512263298, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02997812069952488, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02692447230219841, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.026140904054045677, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017827102914452553, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014293906278908253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013750864192843437, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012759357690811157, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012277184054255486, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009537521749734879, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009624185971915722, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.00837937742471695, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006875487044453621, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09303899109363556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08445209264755249, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07685982435941696, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06830835342407227, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04306566342711449, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03755176439881325, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.057691723108291626, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.052041638642549515, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.043905001133680344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037207864224910736, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.036174479871988297, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02933778241276741, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.024900216609239578, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020808294415473938, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01972270756959915, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014727429486811161, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010913599282503128, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010203522630035877, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00976427923887968, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009067906998097897, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007691935170441866, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007500643841922283, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006241402588784695, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004939321428537369, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2448582649230957, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22464102506637573, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21628810465335846, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.19344162940979004, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11576292663812637, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10760264843702316, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13545823097229004, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12260139733552933, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11748502403497696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10016150027513504, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09531610459089279, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06926850229501724, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05878204479813576, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05563093349337578, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05485650151968002, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03464875370264053, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028410930186510086, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02775639481842518, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02518652379512787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.024705545976758003, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017835984006524086, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01715998165309429, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01652299240231514, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010818061418831348, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22435453534126282, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20319902896881104, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19524051249027252, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16788014769554138, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10495779663324356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09697995334863663, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1219840869307518, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11064781993627548, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10660959780216217, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08756859600543976, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0809841975569725, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06271379441022873, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05318363383412361, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05059211701154709, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04995480179786682, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0314260870218277, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026415174826979637, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025833066552877426, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02292604371905327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.022509604692459106, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016707757487893105, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016694501042366028, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015852361917495728, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011616753414273262, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16143091022968292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15068979561328888, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14652875065803528, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13276489078998566, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07670798897743225, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07256390899419785, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08734370768070221, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08006212115287781, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07759121805429459, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06813827157020569, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06525376439094543, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04488098993897438, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.038733117282390594, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.037217848002910614, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03685402125120163, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022613411769270897, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019867388531565666, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01955341175198555, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018210187554359436, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017994387075304985, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012406590394675732, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013060728088021278, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011904051527380943, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009778447449207306, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24232935905456543, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22706659138202667, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2216634303331375, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20075193047523499, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11496145278215408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10933419317007065, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1292407214641571, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11879931390285492, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11616716533899307, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10228622704744339, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09769773483276367, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06600011140108109, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05696320906281471, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05522892251610756, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05481784790754318, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03305591642856598, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028423096984624863, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028011905029416084, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02582731284201145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025566287338733673, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017347116023302078, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017148606479167938, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0167438592761755, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01131405495107174, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23288646340370178, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2083623856306076, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19864952564239502, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1766173094511032, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10727722197771072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09755839407444, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1271996945142746, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11557745933532715, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11020911484956741, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09174589067697525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08707712590694427, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06494543701410294, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05556244030594826, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0517725944519043, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.050842929631471634, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03280434384942055, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027303040027618408, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02684909477829933, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0242730975151062, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02367960289120674, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01790272258222103, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0178084634244442, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016662782058119774, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012682020664215088, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11749160289764404, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10734041035175323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10174067318439484, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09046624600887299, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05487602576613426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.050165191292762756, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06664568930864334, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060691121965646744, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.055751118808984756, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.047367971390485764, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.045284681022167206, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03393211215734482, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0290362611413002, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026445850729942322, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.025797484442591667, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017019538208842278, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013782193884253502, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01330993790179491, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012275159358978271, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01186760701239109, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008953609503805637, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008885313756763935, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007997574284672737, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005998270120471716, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.0960555449128151, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08791918307542801, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08099082857370377, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07228048890829086, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.044497258961200714, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03954564779996872, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05750785768032074, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05276926979422569, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04523293301463127, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0388067290186882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03737920522689819, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02906939759850502, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.025133857503533363, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.021437114104628563, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020484989508986473, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014548677951097488, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011138261295855045, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.0104827880859375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010005910880863667, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009367885999381542, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00759792048484087, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0074445647187530994, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006336831953376532, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004740586970001459, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2494805008172989, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2308291494846344, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2235892117023468, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.20057313144207, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11803842335939407, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11069167405366898, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13717518746852875, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1239108145236969, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11958770453929901, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10319438576698303, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0983722135424614, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0701889768242836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0592910535633564, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05655841529369354, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.055900800973176956, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035121042281389236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028784366324543953, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.028203148394823074, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025687208399176598, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.025270257145166397, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01802637241780758, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017092958092689514, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016763074323534966, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01052304171025753, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23588827252388, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21288105845451355, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.2050902396440506, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.18281103670597076, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11024836450815201, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.10142137855291367, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12679539620876312, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11548075824975967, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11201182007789612, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09362271428108215, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0888381153345108, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06527624279260635, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.055926356464624405, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05340660735964775, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05282431095838547, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03303160145878792, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02836415357887745, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.027758074924349785, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025130819529294968, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02473653480410576, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018131455406546593, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018525779247283936, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017342954874038696, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013646447099745274, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15103912353515625, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.141261026263237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13713911175727844, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1243763417005539, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07168939709663391, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06780125945806503, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08200516551733017, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07518062740564346, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07252823561429977, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06381575018167496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06116724759340286, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.042073749005794525, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.036319248378276825, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.034749023616313934, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03436961770057678, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021164720878005028, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018477298319339752, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01816784217953682, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01694299653172493, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016716646030545235, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011496597900986671, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01207708939909935, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010973275639116764, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008933640085160732, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2346256673336029, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22033573687076569, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21506942808628082, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1949794888496399, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11130104213953018, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10594303160905838, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12532274425029755, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1152266412973404, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11242274940013885, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09929678589105606, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.094941645860672, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06402302533388138, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0552518293261528, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.053480301052331924, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.053054079413414, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032060202211141586, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027526676654815674, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.027124958112835884, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025058358907699585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024793170392513275, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01683349348604679, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016629904508590698, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016221309080719948, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010967619717121124, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22547964751720428, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20070268213748932, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19048675894737244, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1692795604467392, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10357583314180374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09357380867004395, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12364979088306427, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11243081837892532, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10661153495311737, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08805368095636368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08376017212867737, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0632084310054779, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0540514811873436, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05001859366893768, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04900289699435234, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03197226673364639, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.026397421956062317, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025896091014146805, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023366976529359818, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022718248888850212, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01741267368197441, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01730630174279213, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016063779592514038, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012298570945858955, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.09701310843229294, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0888449102640152, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08315403759479523, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07460622489452362, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04521142691373825, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04080883041024208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.056673742830753326, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05173172801733017, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.04600517079234123, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03947287052869797, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.03803453966975212, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0288411732763052, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02474834956228733, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.021838266402482986, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.021099813282489777, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014459461905062199, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01148480549454689, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01099014189094305, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010361158289015293, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.009897826239466667, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0075899893417954445, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007674855645745993, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.006546099670231342, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005261525046080351, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.08468813449144363, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07717020064592361, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07065802067518234, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0634436160326004, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.039068225771188736, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.034369759261608124, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05171135440468788, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04700183495879173, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0398905985057354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03418683633208275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03321211785078049, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.026149354875087738, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.022423511371016502, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.01886596344411373, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.017929397523403168, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013080855831503868, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.009870411828160286, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.009265623986721039, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008907929062843323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.008314847946166992, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006816905923187733, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006728133652359247, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.005577662028372288, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004414930008351803, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.20597943663597107, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1863701045513153, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17612795531749725, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.15835091471672058, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0956198200583458, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08658165484666824, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11868569999933243, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10606677830219269, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0977596864104271, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0828276127576828, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.07956510782241821, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06062024459242821, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05072781443595886, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.045864757150411606, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04466448351740837, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030315563082695007, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02357446774840355, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.022662179544568062, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020962746813893318, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02016686089336872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01555757224559784, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014837540686130524, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.013445411808788776, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00933134090155363, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23170408606529236, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20880922675132751, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20008189976215363, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1754685789346695, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.1082438975572586, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09923739731311798, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12714950740337372, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1151791661977768, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11030915379524231, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.091654472053051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0858251079916954, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06546130031347275, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05566754192113876, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05251360312104225, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.051746685057878494, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03292414918541908, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02790418639779091, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.027198368683457375, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024681508541107178, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.024200066924095154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017808755859732628, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01837793178856373, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.016778869554400444, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013508904725313187, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15771761536598206, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14739055931568146, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1432320773601532, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1300385296344757, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07467998564243317, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07067394256591797, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08525450527667999, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07824219763278961, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07557831704616547, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06661993265151978, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0638122409582138, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.043666139245033264, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.037752918899059296, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03615506365895271, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03577355667948723, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021968891844153404, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019183145835995674, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01886090636253357, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017611723393201828, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017381098121404648, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011928707361221313, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012484702281653881, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011390242725610733, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009183771908283234, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24180394411087036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22714382410049438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22176003456115723, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20125515758991241, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11452978104352951, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1091206893324852, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12888824939727783, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.118504598736763, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11567758768796921, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10236813127994537, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09779099375009537, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06585129350423813, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.056873321533203125, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.055070698261260986, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05465089902281761, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03307652100920677, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.028539303690195084, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028138592839241028, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02607870101928711, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02581292763352394, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017604108899831772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017522336915135384, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016990963369607925, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011924130842089653, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23824597895145416, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21088972687721252, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19978748261928558, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17659392952919006, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10947985202074051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09855187684297562, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1319633573293686, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11898907274007797, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.11295049637556076, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09251723438501358, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08775095641613007, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0673690140247345, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.057287923991680145, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0529690645635128, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.05188596993684769, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03405778482556343, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.028118222951889038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02759735845029354, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024837912991642952, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.024150772020220757, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018667442724108696, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018651004880666733, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.017181668430566788, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01349376980215311, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11271198838949203, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10377741605043411, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09779992699623108, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0876414030790329, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05277024582028389, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04809168353676796, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0658673346042633, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05962727591395378, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05360694229602814, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04627222195267677, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04456841200590134, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.033672869205474854, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028664251789450645, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02557607740163803, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024812856689095497, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016936995089054108, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013600688427686691, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013083131983876228, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012339436449110508, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011864544823765755, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009044334292411804, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009204842150211334, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007917998358607292, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006572572514414787, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09560883045196533, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08689272403717041, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07871992141008377, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07080943882465363, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04413821920752525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03832261264324188, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05936971679329872, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05422178655862808, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04512195289134979, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.038637857884168625, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03770981729030609, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03004121221601963, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.025886792689561844, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02131287194788456, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020095955580472946, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01504101138561964, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011176380328834057, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01037950161844492, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010098039172589779, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00930392649024725, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007852853275835514, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007743765600025654, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006293707061558962, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005008520558476448, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2266918569803238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20577819645404816, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1953897625207901, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17675118148326874, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10607346147298813, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09666803479194641, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13071881234645844, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11637565493583679, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10839439928531647, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09214523434638977, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08884671330451965, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06717687100172043, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05583100765943527, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.050978850573301315, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04979608952999115, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03365231677889824, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02613971382379532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02521435357630253, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023275161162018776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02250049076974392, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01734207384288311, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016248542815446854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015141147188842297, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010195969603955746, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2068164348602295, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18390291929244995, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17555847764015198, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1503261923789978, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09566208720207214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0870688259601593, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11224916577339172, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10187555104494095, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09750711172819138, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07869106531143188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07401660829782486, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0578446127474308, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0494869127869606, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04673457145690918, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04604298621416092, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02930370904505253, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.025377310812473297, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.024747338145971298, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02214246243238449, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021738290786743164, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016354216262698174, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017333589494228363, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015481121838092804, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013419552706182003, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14716748893260956, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1375143826007843, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13340488076210022, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12108345329761505, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0696563869714737, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06575972586870193, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07972589135169983, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07331521809101105, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07051347196102142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06208811327815056, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.059461746364831924, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04081733152270317, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.035314254462718964, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.033674925565719604, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03328178450465202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020491812378168106, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01776430942118168, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017445195466279984, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016276242211461067, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016035640612244606, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011022169142961502, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011448008008301258, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010470874607563019, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008260181173682213, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.230839341878891, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21663017570972443, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21132217347621918, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19188332557678223, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10929896682500839, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10395897924900055, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12336335331201553, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11343906819820404, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11042678356170654, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0976872444152832, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09334658086299896, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06297940015792847, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05441930517554283, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.052535343915224075, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.052082691341638565, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03159616142511368, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027139537036418915, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026728300377726555, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02478322759270668, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02449968084692955, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01671142317354679, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016566656529903412, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016060562804341316, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01108416635543108, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22723548114299774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19993165135383606, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18830852210521698, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1661524623632431, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10414446890354156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0929853618144989, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1269676387310028, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1144927442073822, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10759903490543365, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08758021891117096, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08330981433391571, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0650353655219078, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05526638776063919, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.050497524440288544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0493050292134285, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.033022500574588776, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.027028679847717285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02643866464495659, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02387615293264389, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023116668686270714, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018312867730855942, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018304158002138138, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016723787412047386, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013461729511618614, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11407977342605591, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10541316121816635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09916649758815765, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08945292234420776, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05338190868496895, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04859086126089096, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06680897623300552, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06081352382898331, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.054233383387327194, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.047181762754917145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04567400738596916, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03409223258495331, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029119525104761124, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025787202641367912, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024950852617621422, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017104946076869965, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013574754819273949, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013020324520766735, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0123591935262084, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011838681064546108, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009091852232813835, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00905389804393053, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007919562980532646, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006234162487089634, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09310220181941986, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08551493287086487, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07703717797994614, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06968769431114197, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04301534965634346, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03732913359999657, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05875159054994583, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05388110131025314, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04385603591799736, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.038116175681352615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03736985847353935, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02980734594166279, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02561074122786522, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.020815899595618248, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019512711092829704, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014930023811757565, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.010930588468909264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010111016221344471, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00997418537735939, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009159009903669357, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007840286940336227, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007656737696379423, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006199046969413757, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0049715605564415455, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23566286265850067, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21591965854167938, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2067124843597412, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18691284954547882, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11048603802919388, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10193748772144318, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13314580917358398, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11930137872695923, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11252519488334656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09666657447814941, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09287882596254349, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06817846745252609, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0571473054587841, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05300746485590935, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05200693756341934, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03412405401468277, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02702120691537857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02623380534350872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024183789268136024, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.023523855954408646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01746411807835102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01644989661872387, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015540516003966331, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010082983411848545, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2187037467956543, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19835539162158966, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19121040403842926, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1670026183128357, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10220380872488022, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09439226984977722, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11849694699048996, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10688106715679169, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.1036917120218277, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08623555302619934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08125124871730804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.061076343059539795, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05199682340025902, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04987335950136185, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0493505485355854, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030874546617269516, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027065392583608627, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026574239134788513, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024087505415081978, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0237777940928936, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017167873680591583, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018367059528827667, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01647518202662468, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01430017314851284, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14334601163864136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13400039076805115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13016793131828308, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.11834368854761124, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06775261461734772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06407050043344498, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07732218503952026, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07106029242277145, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06856942921876907, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.060482293367385864, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.057961512356996536, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03959014266729355, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03419751301407814, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03270886093378067, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03235252946615219, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.019880082458257675, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01721295900642872, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.016918068751692772, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015778737142682076, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015556886792182922, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010735390707850456, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011017805896699429, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010245028883218765, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007887307554483414, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2259766310453415, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21227607131004333, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.206988126039505, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.18822039663791656, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10701120644807816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10186910629272461, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12087475508451462, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11102186888456345, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.10814476758241653, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09578188508749008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09179985523223877, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06180591881275177, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.053303468972444534, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.051507823169231415, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05107777938246727, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031068241223692894, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02680560201406479, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026427343487739563, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02455226331949234, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024285344406962395, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01676873117685318, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01664605922996998, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016172433272004128, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011506594717502594, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21341075003147125, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18715646862983704, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17500637471675873, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1536528766155243, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09785882383584976, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08640793710947037, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1208440512418747, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10926884412765503, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10115545988082886, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08177630603313446, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07779954373836517, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.061958424746990204, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05297979712486267, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04760659486055374, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.046269990503787994, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03164858743548393, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025740452110767365, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025049714371562004, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022725561633706093, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021848881617188454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01783047243952751, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017850907519459724, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.01611265167593956, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013301648199558258, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.114563949406147, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10567355155944824, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0982988029718399, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08919793367385864, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05351206660270691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04807669669389725, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06924380362033844, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06261074542999268, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0544566810131073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04746507108211517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04617548733949661, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.035337530076503754, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030023517087101936, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025942955166101456, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024898361414670944, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01774795912206173, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01378191914409399, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013114762492477894, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01260819099843502, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011972665786743164, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009451611898839474, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009466121904551983, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007984105497598648, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006642438471317291, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09458857029676437, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08609934896230698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07585008442401886, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06898406147956848, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.043481096625328064, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03663790971040726, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06196996942162514, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05638426914811134, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.0445336177945137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0384351946413517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.038005899637937546, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03133115917444229, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026825351640582085, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.021035557612776756, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.01948699913918972, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015648609027266502, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011113811284303665, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010128917172551155, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010139363817870617, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009146292693912983, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008209094405174255, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008002910763025284, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006205088458955288, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005192013923078775, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23308472335338593, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21078738570213318, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19998441636562347, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1807243525981903, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10831744968891144, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09844287484884262, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13317374885082245, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11919859796762466, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11080434173345566, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0939849317073822, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09063461422920227, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06844930350780487, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.056993044912815094, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.0520051009953022, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05077233538031578, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03433668985962868, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.026620665565133095, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02564438246190548, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023676438257098198, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022852055728435516, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017626818269491196, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016509203240275383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01525761466473341, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010236315429210663, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23700131475925446, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20623520016670227, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19509510695934296, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1635322868824005, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.11024859547615051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09807514399290085, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1294783502817154, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11786968261003494, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11264511942863464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08710215985774994, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.08134474605321884, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06670652329921722, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.056946076452732086, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05340130254626274, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.052568577229976654, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03357592597603798, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.028348298743367195, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.027495549991726875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023863857612013817, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02329135313630104, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018131215125322342, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018659360706806183, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.017017902806401253, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013595882803201675, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1482267528772354, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13918204605579376, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13559162616729736, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1232214868068695, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07015810161828995, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0666951835155487, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0792241021990776, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07301153987646103, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07090355455875397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06276976317167282, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06007279455661774, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04043992981314659, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.035069383680820465, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03378699719905853, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.033485520631074905, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020272959023714066, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.017640912905335426, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017374303191900253, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016170592978596687, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015979083254933357, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01080233883112669, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011065533384680748, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010373314842581749, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007741572801023722, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2338140457868576, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22001652419567108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21507848799228668, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19540660083293915, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11053856462240219, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10553683340549469, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12392736226320267, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11407815665006638, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11163056641817093, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09907212853431702, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09476374089717865, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06312088668346405, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05465066432952881, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.053067032247781754, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0526885911822319, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03170071914792061, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027290740981698036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026932397857308388, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02494366280734539, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02471095137298107, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016774959862232208, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016436588019132614, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016242681071162224, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0108217503875494, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21179041266441345, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18640698492527008, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1753719449043274, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15444493293762207, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09715536236763, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08651077747344971, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11738607287406921, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1068141907453537, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10018160194158554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08120772242546082, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07718721032142639, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.060071609914302826, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.051424190402030945, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04698094725608826, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.045862555503845215, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030462630093097687, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024932540953159332, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024388780817389488, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02185956761240959, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02115488424897194, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01676352694630623, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01662791520357132, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015304519794881344, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011965424753725529, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10996176302433014, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10073181986808777, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09356263279914856, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0848357230424881, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.051284193992614746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.045834872871637344, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06623008847236633, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060041457414627075, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.052298858761787415, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.045226458460092545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04405396059155464, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03383677080273628, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028746556490659714, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024798860773444176, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02379159815609455, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017001638188958168, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013060290366411209, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012397843413054943, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011867552995681763, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011240771040320396, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009049395099282265, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008849197067320347, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007651995867490768, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006018898915499449, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09530160576105118, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08648163080215454, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07684509456157684, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06984582543373108, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04395124316215515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03723035380244255, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06177803874015808, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05597073957324028, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04504808783531189, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03871450945734978, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03824552521109581, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.031404584646224976, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02680739015340805, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.021265607327222824, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019778652116656303, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01567644067108631, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01121011283248663, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01026697363704443, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010186262428760529, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009248313494026661, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008164681494235992, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007980932481586933, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0062258318066596985, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005192429292947054, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22283312678337097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19944006204605103, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18709352612495422, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16911160945892334, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10289248824119568, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09198305755853653, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12969358265399933, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11575833708047867, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10572423040866852, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08867950737476349, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08571898192167282, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0667557418346405, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05531768873333931, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.049360476434230804, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04786927253007889, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03351708874106407, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02530037797987461, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02416488155722618, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022387241944670677, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.021394824609160423, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01719697006046772, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015956494957208633, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014414956793189049, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00979910884052515, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.23514217138290405, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20280559360980988, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19160886108875275, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.16182473301887512, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10889755189418793, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0960594043135643, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12579098343849182, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1157328262925148, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.11138952523469925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08531276881694794, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07876820117235184, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06458285450935364, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.055648863315582275, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05251171067357063, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.05174800381064415, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03244924545288086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.027513938024640083, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.026680126786231995, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022876225411891937, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.02236119657754898, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017319755628705025, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01759968139231205, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.01627437025308609, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012365806847810745, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14625142514705658, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13744205236434937, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13415026664733887, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12196078151464462, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06926465034484863, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06599299609661102, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07786588370800018, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07179099321365356, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.06995923072099686, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0620274543762207, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05938716232776642, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03979003429412842, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03448539227247238, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.033350273966789246, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03308117017149925, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01996702328324318, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.017391996458172798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017149873077869415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015945490449666977, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.015782184898853302, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010680372826755047, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010849066078662872, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010320953093469143, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007583298720419407, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2399349808692932, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22602584958076477, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2212757170200348, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.2012079358100891, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11364853382110596, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.10863851010799408, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12677966058254242, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11688641458749771, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11472145467996597, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10189954936504364, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09741388261318207, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06474559754133224, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.056027043610811234, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05455145612359047, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.0542110875248909, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032445020973682404, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02808866836130619, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02774565853178501, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02570238523185253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02548525109887123, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01716693863272667, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016940206289291382, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.0166839100420475, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011229011230170727, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2098853439092636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18487603962421417, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17366406321525574, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15400606393814087, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09594176709651947, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.0852605402469635, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11651524156332016, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10628291964530945, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.0991344079375267, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.080837681889534, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07696404308080673, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.059256941080093384, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.051099784672260284, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04640795290470123, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.0452117957174778, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030095065012574196, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024596845731139183, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02400391362607479, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021671563386917114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.020910680294036865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016550792381167412, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01642182283103466, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015021229162812233, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011715520173311234, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11030188202857971, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10058876872062683, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09315881133079529, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08447563648223877, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.051308609545230865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04563109949231148, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06654596328735352, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06036579608917236, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.052510861307382584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04516196623444557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04397178441286087, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0340312123298645, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02893434651196003, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024828819558024406, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023766880854964256, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017129138112068176, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013101719319820404, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012414040975272655, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011897235177457333, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011236060410737991, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009169441647827625, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008929722011089325, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007730476092547178, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00608318904414773, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10294271260499954, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09336010366678238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08293972164392471, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0754251629114151, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04754213988780975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.040277037769556046, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06642818450927734, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.060529354959726334, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.048738330602645874, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04184591770172119, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04123460501432419, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.033732324838638306, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02893729880452156, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.023059912025928497, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02146603725850582, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016925835981965065, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012209828943014145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01119791530072689, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011101938784122467, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010107504203915596, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008875690400600433, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008738958276808262, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006876006722450256, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005786132998764515, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22048388421535492, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19641442596912384, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18306463956832886, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.16517794132232666, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10161953419446945, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.08987275511026382, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1300858110189438, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11567744612693787, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10453563928604126, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08708666265010834, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08442971110343933, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06655306369066238, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05520036444067955, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.0487518310546875, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04709254577755928, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.033379826694726944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02501719817519188, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.023766860365867615, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02206951379776001, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.020976100116968155, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017082538455724716, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015938585624098778, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.014179624617099762, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009781052358448505, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22095546126365662, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1905936896800995, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1788051724433899, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14524328708648682, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10339843481779099, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.09168832749128342, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12220777571201324, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11149511486291885, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10548698157072067, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08011937886476517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07063788920640945, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06254670768976212, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05378152057528496, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05015096440911293, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.049221962690353394, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03147629648447037, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02663041092455387, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025823796167969704, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02216685563325882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.0215611569583416, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01704096421599388, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017628129571676254, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015826646238565445, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01284649595618248, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15919941663742065, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1497431993484497, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14623619616031647, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13297651708126068, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07538161426782608, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07186087965965271, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08456668257713318, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07804421335458755, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07614047080278397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06754198670387268, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06461623311042786, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04320870712399483, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0374407023191452, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03624648228287697, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.035963889211416245, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021644627675414085, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018825605511665344, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018561288714408875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017246518284082413, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017071621492505074, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011494748294353485, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011630144901573658, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011104326695203781, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007988582365214825, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2441408932209015, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.230081245303154, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22524219751358032, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20474648475646973, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11559819430112839, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11054205149412155, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12878215312957764, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11886142194271088, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11664538085460663, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10370371490716934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09908103942871094, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06569153070449829, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.056904926896095276, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05541860684752464, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.055067870765924454, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03285488858819008, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02841995470225811, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028060054406523705, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025976981967687607, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.025754449889063835, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017198005691170692, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01695394143462181, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.016705447807908058, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010996097698807716, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.21675999462604523, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19205372035503387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18108996748924255, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1609480381011963, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09933257848024368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08899856358766556, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12025266885757446, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10940585285425186, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10240654647350311, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08419059962034225, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08020889759063721, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06110252067446709, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05251108855009079, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.0479699932038784, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04683011397719383, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030969535931944847, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.025317130610346794, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024757293984293938, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022365590557456017, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.021643463522195816, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01698288321495056, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016712311655282974, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015505810268223286, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01181691326200962, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11218442022800446, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10278365015983582, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09579238295555115, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08691207319498062, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05230937898159027, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04690304771065712, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06705804169178009, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06077463924884796, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.053380127996206284, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04613402485847473, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04494491592049599, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03416117653250694, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029103370383381844, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025259017944335938, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024277053773403168, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017142148688435555, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013242560438811779, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01259603537619114, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01201890129595995, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011405231431126595, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009065640158951283, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008865541778504848, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007696931716054678, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005949995946139097, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10195581614971161, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09231427311897278, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08345787227153778, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07569685578346252, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04701239615678787, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04061029851436615, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06358174979686737, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05801459774374962, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.04823722690343857, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04126638546586037, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.040418948978185654, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03225686401128769, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027664372697472572, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022750787436962128, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.021450864151120186, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01617971621453762, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011954094283282757, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011117321439087391, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01081607025116682, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009995204396545887, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00845037680119276, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008302921429276466, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006748953368514776, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005446247756481171, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23555190861225128, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21348415315151215, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20271846652030945, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1831926554441452, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1095895916223526, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09975043684244156, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13353805243968964, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12052446603775024, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1119946837425232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09519392997026443, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09157679975032806, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06831196695566177, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0576154962182045, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05256574600934982, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05130768567323685, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03415916860103607, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0268724262714386, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025901712477207184, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023949719965457916, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.023114560171961784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017509739845991135, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016656676307320595, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01532205194234848, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010269878432154655, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.217591792345047, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18704448640346527, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17560575902462006, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1441204994916916, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09982987493276596, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08849795162677765, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11832413077354431, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10834311693906784, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10275885462760925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07768703252077103, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07089629769325256, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06052326783537865, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05211033672094345, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.0482783317565918, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04734092205762863, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030367081984877586, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02544424496591091, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02458730712532997, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0211372971534729, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020483974367380142, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016252664849162102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016622478142380714, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015036218799650669, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011771945282816887, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16536231338977814, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15554314851760864, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15194787085056305, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1380366086959839, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07831712067127228, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07464653253555298, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08791803568601608, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08099674433469772, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0790640339255333, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07010579109191895, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06714296340942383, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04487128183245659, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03886975347995758, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.037664152681827545, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.037376150488853455, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02255263738334179, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019566915929317474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019296059384942055, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017917437478899956, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01773597113788128, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012102308683097363, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012097691185772419, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01171506941318512, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008317114785313606, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2488020658493042, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23435337841510773, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22939328849315643, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20844939351081848, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.1178482323884964, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11264537274837494, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.131266251206398, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1211838647723198, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1189514622092247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10559721291065216, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10085280984640121, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0670306533575058, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05804015323519707, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.056540463119745255, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05617374926805496, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0335550382733345, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.029009254649281502, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028644725680351257, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02648819237947464, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02626560628414154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01766720600426197, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017328625544905663, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017164010554552078, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011281037703156471, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22226375341415405, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19674783945083618, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18537375330924988, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16507817804813385, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10175424814224243, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09100262820720673, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1225186362862587, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11220595240592957, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10498284548521042, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08629871904850006, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08226513117551804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06250262260437012, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.053832635283470154, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.049081914126873016, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04788627102971077, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03151160106062889, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02579675242304802, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02522328309714794, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022769896313548088, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02202330343425274, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016964489594101906, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016910040751099586, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015384342521429062, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011804034002125263, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11979696154594421, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11013216525316238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10277605801820755, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.0933145210146904, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05605807155370712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.050346169620752335, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07128642499446869, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06498492509126663, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05709133297204971, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04955287277698517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.048124901950359344, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.036389950662851334, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03119237720966339, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.027122192084789276, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02608533203601837, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018248308449983597, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014286679215729237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.013600028119981289, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012995551340281963, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012354416772723198, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009632118977606297, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009624293074011803, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008216999471187592, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006577853113412857, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11028541624546051, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10087373852729797, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0909341424703598, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.08265221118927002, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.05116065964102745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.0441729761660099, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06982025504112244, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06376586854457855, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05228058993816376, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04523393139243126, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.044420067220926285, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03553788363933563, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.030447108671069145, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.024766404181718826, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.023230332881212234, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017775025218725204, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012995264492928982, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012041871435940266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011824293062090874, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010881436988711357, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.009294250048696995, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009063173085451126, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007359364069998264, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005918077193200588, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.24369852244853973, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2230430394411087, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2136492133140564, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.19329451024532318, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11401800811290741, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10509436577558517, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13626423478126526, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12322705239057541, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11621727794408798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09983064979314804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09588705003261566, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06952869892120361, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05891232565045357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05466600880026817, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05360497161746025, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.034786876291036606, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.027905061841011047, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027059270069003105, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0249729435890913, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.024287421256303787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01783568225800991, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016981706023216248, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015973567962646484, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01040789857506752, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.19467608630657196, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17131191492080688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.16104736924171448, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1348172426223755, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09127122163772583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08182020485401154, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10900788754224777, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09973972290754318, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09257114678621292, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07190728187561035, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06687090545892715, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05579103156924248, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047829240560531616, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.043954942375421524, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04299391061067581, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.027919717133045197, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.022833600640296936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02208798937499523, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018961384892463684, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.018296049907803535, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014724990352988243, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0145567050203681, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013399706222116947, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009716043248772621, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16512469947338104, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1552041918039322, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1516067236661911, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13766242563724518, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07823630422353745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07454561442136765, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08763580024242401, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08087964355945587, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07899671047925949, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06995218247175217, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06689385324716568, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.044746074825525284, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03880283236503601, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.037604186683893204, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.037318579852581024, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022422125563025475, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01950686424970627, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01923765055835247, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017830591648817062, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017653966322541237, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011903641745448112, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011996807530522346, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011516914702951908, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008189506828784943, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.2519456446170807, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2372623234987259, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23224857449531555, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.21102692186832428, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11941710114479065, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11411257088184357, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13333113491535187, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12273355573415756, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.1205306202173233, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10692175477743149, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.1023101657629013, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06799102574586868, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0588342510163784, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05733982101082802, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05698588863015175, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03413550928235054, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.029498785734176636, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.029137032106518745, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026942653581500053, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02671757899224758, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01812550239264965, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01773495227098465, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017634782940149307, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011706787161529064, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22584789991378784, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20032960176467896, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18917427957057953, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.16863112151622772, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10346700996160507, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09282894432544708, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12443669140338898, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1137256920337677, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10673227161169052, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08804264664649963, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08381009846925735, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06306871026754379, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05449603497982025, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04984211176633835, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04868248105049133, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031872477382421494, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.026104185730218887, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.0255314651876688, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023069817572832108, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022319165989756584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017239168286323547, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016972128301858902, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015709081664681435, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011688798666000366, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12565436959266663, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11594274640083313, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10872288048267365, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09847476333379745, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.0589301697909832, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.053320784121751785, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07434506714344025, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06770142912864685, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05994607135653496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05210364609956741, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.05054550990462303, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.037881046533584595, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0324799083173275, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.028458522632718086, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.027437463402748108, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.019007833674550056, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01489074993878603, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.014211433008313179, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013531499542295933, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012895501218736172, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009958647191524506, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00987287424504757, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008541939780116081, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006605089176446199, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.11120964586734772, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.10199297964572906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.09263473004102707, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0841052383184433, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.05170231685042381, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04501377046108246, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06944484263658524, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06364504992961884, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.052796997129917145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04575767368078232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04474934563040733, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03519657254219055, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.03042827919125557, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02497148886322975, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.023528898134827614, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017628710716962814, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.013039077632129192, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012115994468331337, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011846727691590786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010942041873931885, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0091661736369133, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008953971788287163, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007354307919740677, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005727020092308521, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.2452186793088913, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22479906678199768, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2155027538537979, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1948813498020172, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11464369297027588, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.1058836281299591, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13758143782615662, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12366386502981186, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11675673723220825, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.1004927009344101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09644680470228195, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07024683803319931, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05911414325237274, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05493653938174248, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05390472710132599, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035077255219221115, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.028038544580340385, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.027191778644919395, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02510158345103264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02443724498152733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01804296486079693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01705366186797619, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016140621155500412, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010432195849716663, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20850227773189545, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18186359107494354, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17222443222999573, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.1394156664609909, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09738145768642426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08732113987207413, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11415378749370575, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10351989418268204, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09944142401218414, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07596345990896225, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.0670575350522995, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.058138713240623474, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0497426800429821, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04689354822039604, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04619409888982773, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029236985370516777, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02432151883840561, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023643698543310165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02000713348388672, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01951828971505165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015603424981236458, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015215186402201653, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014605548232793808, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01024495530873537, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16934557259082794, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15908725559711456, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15537162125110626, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.14110097289085388, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.0803513303399086, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07654863595962524, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09013783931732178, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08309808373451233, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08113621175289154, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07178699225187302, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06857819855213165, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04607882350683212, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03990349918603897, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03866398707032204, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.038364965468645096, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023117324337363243, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020093943923711777, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01981450989842415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018369223922491074, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01818779669702053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012354173697531223, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01242885459214449, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011948809027671814, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00855899415910244, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.25488853454589844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23984354734420776, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2347051054239273, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.21319788694381714, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12087038159370422, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11549661308526993, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13439242541790009, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12421994656324387, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12199267745018005, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10810069739818573, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10314762592315674, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06855641305446625, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05948533117771149, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05796077847480774, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.057611580938100815, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.034277841448783875, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.029679464176297188, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.029307512566447258, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027043195441365242, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.026815881952643394, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017851203680038452, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017624156549572945, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017341695725917816, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011343822814524174, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.22761040925979614, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20178255438804626, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19008582830429077, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.169702410697937, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10407497733831406, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09318220615386963, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1255543977022171, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11519418656826019, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10745014995336533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08883528411388397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08466915041208267, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06398986279964447, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05517728999257088, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05010175332427025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04884114861488342, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03214804828166962, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.026175934821367264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.025549041107296944, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023154759779572487, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.022348254919052124, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0171828456223011, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.0169949010014534, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015516906045377254, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011559789068996906, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11956556886434555, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10983172059059143, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10222430527210236, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09254322201013565, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05599968507885933, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.050160862505435944, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07166346162557602, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06531864404678345, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05706649646162987, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0493919812142849, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04805462807416916, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03654852509498596, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.031350165605545044, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.027085108682513237, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02599060907959938, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0183405764400959, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014231416396796703, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01350815873593092, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012917844578623772, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012228898704051971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009571333415806293, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009581122547388077, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008057977072894573, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006448492873460054, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10703013092279434, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09752926230430603, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08708211034536362, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07896272838115692, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0495661199092865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04227042570710182, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06883584707975388, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06289839744567871, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05071664974093437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.043708547949790955, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.042947836220264435, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.034870442003011703, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02997840940952301, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.023994266986846924, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.022386228665709496, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01744828000664711, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.012609485536813736, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.011589618399739265, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011446917429566383, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.010433473624289036, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.009174785576760769, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008884693495929241, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.007171977311372757, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005744534078985453, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23581856489181519, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2131485491991043, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2013947069644928, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.18173331022262573, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10958094149827957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09908465296030045, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13598014414310455, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12177545577287674, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11216945201158524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09496910125017166, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.0916658416390419, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06976042687892914, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05815589427947998, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05256136506795883, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05117220804095268, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03492499515414238, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.026880696415901184, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025789806619286537, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023874741047620773, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022956470027565956, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01790420524775982, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016716057434678078, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015372361056506634, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010224482975900173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.20338481664657593, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18318665027618408, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.176656112074852, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14532648026943207, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09538476914167404, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08827226608991623, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10781723260879517, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09926442056894302, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09709298610687256, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0773458480834961, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06993874162435532, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05515936017036438, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04760374128818512, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.04582863673567772, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04539521411061287, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.027600599452853203, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.02363365888595581, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.023110482841730118, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019974472001194954, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01968713290989399, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014475362375378609, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014417143538594246, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.013890267349779606, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00955972820520401, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.17299003899097443, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16241584718227386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15859176218509674, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1439054310321808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.08212155103683472, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07820713520050049, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09204479306936264, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08493796736001968, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.082929328083992, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07328996807336807, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06996925920248032, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04704703390598297, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04076601564884186, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03948824480175972, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03918827697634697, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02356349490582943, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.020472627133131027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02018905058503151, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018688980489969254, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018496757373213768, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01249330397695303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012585047632455826, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.01207448448985815, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008570852689445019, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.25549179315567017, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2402162104845047, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2351655215024948, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.21341297030448914, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.12130487710237503, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.11580322682857513, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13516007363796234, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1247178241610527, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.12244705110788345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1083955317735672, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10347011685371399, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06901095807552338, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.059769827872514725, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05822189897298813, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05785509943962097, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03455374762415886, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.029902806505560875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.02953045442700386, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027257908135652542, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.027019405737519264, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01815716177225113, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017915258184075356, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017636463046073914, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011726513504981995, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23137956857681274, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20594966411590576, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19442638754844666, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17353597283363342, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10609742254018784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09534408897161484, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12764523923397064, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11709500104188919, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10940949618816376, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09082799404859543, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08643035590648651, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0650697872042656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05621398985385895, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05117955431342125, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04991012439131737, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03292669728398323, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.02688777633011341, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.02625882998108864, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023897230625152588, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.023089490830898285, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01784907653927803, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01762976497411728, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016226256266236305, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012249250896275043, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.12347149848937988, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11352568119764328, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10698164254426956, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.09701184183359146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05796470865607262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.052636679261922836, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07217317819595337, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06558632850646973, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.059000514447689056, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0510648675262928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04943782091140747, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03685983642935753, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03143996000289917, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02799091301858425, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.027125954627990723, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0185091532766819, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.014641459099948406, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.014021697454154491, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013263458386063576, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.012712989933788776, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0097214225679636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009609443135559559, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.008455991744995117, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006459539756178856, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.10949043929576874, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1007816419005394, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.09389980137348175, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0849740281701088, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.0512068048119545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04598483443260193, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0652322843670845, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05970275402069092, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.05209134891629219, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.045232586562633514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04378879442811012, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03305380046367645, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02853020653128624, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.024721717461943626, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.02374759502708912, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016592295840382576, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.01289486326277256, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.012241802178323269, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011692315340042114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.011078953742980957, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00867669377475977, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008568890392780304, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0073593552224338055, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005631084553897381, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23104211688041687, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2119576781988144, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20319640636444092, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1836818903684616, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1084398478269577, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.10010217130184174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12839674949645996, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11714249104261398, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1104716956615448, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09499990195035934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09100406616926193, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06546861678361893, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.055996373295784, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05199310928583145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.051017723977565765, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03268727287650108, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.026467079296708107, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025661004707217216, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023665674030780792, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02302449569106102, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016769403591752052, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01599832810461521, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015231747180223465, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00968947820365429, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.2081364542245865, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18013550341129303, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17020441591739655, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.14290852844715118, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09752151370048523, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.08703428506851196, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11321958154439926, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10370352864265442, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0995868369936943, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07553713023662567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07068905979394913, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.058236368000507355, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.050087615847587585, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.047285642474889755, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04660243168473244, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029444001615047455, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.025189965963363647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.02452397532761097, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020989365875720978, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.020548362284898758, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016152389347553253, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016639158129692078, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015279729850590229, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01229725033044815, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.16795997321605682, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15750008821487427, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15365928411483765, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.13931480050086975, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07980313152074814, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0758977010846138, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0896696001291275, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08276276290416718, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.08064629137516022, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07114449888467789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06791985780000687, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04587840661406517, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.039764732122421265, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.038428205996751785, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03810626640915871, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02299620397388935, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01997886598110199, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.01968388259410858, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018231069669127464, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.018029801547527313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01223289966583252, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012380778789520264, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011798864230513573, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00852987915277481, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.24801217019557953, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.233144149184227, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22806447744369507, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.20687270164489746, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.11787109822034836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1124686747789383, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1314791738986969, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12134826183319092, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11900662630796432, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10524175316095352, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.10042516887187958, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0672215074300766, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.058170050382614136, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05659637600183487, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05622505396604538, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033651236444711685, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.02909892052412033, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.028719443827867508, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026493679732084274, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.02626597322523594, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017745446413755417, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017475178465247154, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.017212769016623497, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011478597298264503, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.23041701316833496, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20456433296203613, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19232697784900665, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.17163756489753723, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.10557666420936584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.09437593817710876, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12904219329357147, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1174306869506836, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.10897639393806458, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09019948542118073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.08598333597183228, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06559576839208603, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05629194527864456, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.05093326047062874, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.049586791545152664, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03314448148012161, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.026776224374771118, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.026108287274837494, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023762892931699753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02291189320385456, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018008600920438766, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017627999186515808, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.016239726915955544, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012240967713296413, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11484789848327637, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10542397201061249, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09697042405605316, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08797403424978256, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.05379639193415642, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04753723740577698, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07122927904129028, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06434879451990128, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05484512448310852, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04748493432998657, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04648180678486824, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03641435503959656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03086746111512184, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.026080673560500145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.024826278910040855, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.018239427357912064, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013780212961137295, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012977289035916328, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012538379058241844, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011755154468119144, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00961507298052311, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009493762627243996, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007887966930866241, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006436014547944069, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09974455833435059, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09076916426420212, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0791095718741417, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07200805097818375, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04617327079176903, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03823903948068619, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06717640161514282, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.060978058725595474, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.047321829944849014, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.040873657912015915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.040522754192352295, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03393125906586647, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.029169650748372078, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02243197336792946, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020533287897706032, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017055176198482513, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011855005286633968, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010679677128791809, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010813890025019646, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009627796709537506, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008891855366528034, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008648437447845936, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006577863357961178, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005564586725085974, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23516295850276947, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21174907684326172, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19910240173339844, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1795388162136078, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.10929422080516815, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09793423116207123, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13734307885169983, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1229572668671608, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.11200986057519913, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09449629485607147, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09131529927253723, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07046365737915039, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.058925025165081024, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05251876637339592, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.050937470048666, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03542424738407135, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02704021893441677, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.02584090083837509, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02404966950416565, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02300582453608513, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01822775974869728, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017241040244698524, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015409726649522781, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010890479199588299, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.22708113491535187, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19281569123268127, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1803952157497406, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.15256927907466888, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.10504996031522751, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0923689752817154, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12380506098270416, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11314652860164642, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.10788503289222717, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08050927519798279, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.07560627907514572, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06348112225532532, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.054344307631254196, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.05067337304353714, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04977373778820038, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.031843151897192, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.026601716876029968, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.025710945948958397, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02181166224181652, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.021195441484451294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017064480111002922, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01717921905219555, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.015867380425333977, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012031559832394123, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15654458105564117, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1465766876935959, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14262214303016663, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12924803793430328, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07435522973537445, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.07047905772924423, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08395696431398392, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07748892903327942, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07517430186271667, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06613568961620331, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06315740197896957, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04295571893453598, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03723416104912758, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03580944240093231, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03546900674700737, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021524829789996147, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.018614862114191055, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018318763002753258, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01696755178272724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.016751617193222046, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011429485864937305, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011572700925171375, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010958374477922916, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00795959122478962, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.22989918291568756, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21573176980018616, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2107011079788208, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.19097590446472168, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.10921651124954224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.1039983406662941, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12219774723052979, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11286291480064392, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.11031775176525116, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09734898805618286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.09286253154277802, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06249852478504181, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05412173271179199, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.05248439311981201, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.05210233852267265, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031319934874773026, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.027097703889012337, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.026720618829131126, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024674447253346443, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024428751319646835, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01652870886027813, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016467265784740448, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015974152833223343, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011008094996213913, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.209926575422287, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18528898060321808, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17346689105033875, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15415869653224945, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09622222185134888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08527809381484985, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11792925745248795, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10776503384113312, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09943075478076935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08159171044826508, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.07772467285394669, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06033932417631149, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.051894091069698334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.04661395773291588, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.04527271166443825, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03060942143201828, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024819539859890938, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024148108437657356, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02200757898390293, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02116658352315426, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01686796359717846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016787594184279442, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.015135303139686584, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012070629745721817, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11296364665031433, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10363525152206421, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09586747735738754, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08687084168195724, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.052655886858701706, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04691650718450546, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07017667591571808, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06232992187142372, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.053862202912569046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04657565802335739, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04553486779332161, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.035819072276353836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029866447672247887, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.025487912818789482, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.02434350550174713, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017990991473197937, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.01340189017355442, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012676199898123741, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012200454249978065, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011488635092973709, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009426399134099483, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009110082872211933, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007675031200051308, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006114030256867409, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09909959137439728, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09134811162948608, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08276616781949997, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.07507380843162537, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04612899571657181, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.04017261415719986, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06329464912414551, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05731472373008728, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.047028131783008575, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04109064117074013, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.04025833308696747, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03217242658138275, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02741382084786892, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.022329343482851982, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.020957227796316147, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016133906319737434, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011664587073028088, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01081513799726963, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01066257432103157, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009808910079300404, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008396126329898834, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008102606050670147, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006587101612240076, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005198964849114418, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.25334328413009644, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2337963730096817, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2242927998304367, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.20288878679275513, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.1193167194724083, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.11054381728172302, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.142295241355896, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12900829315185547, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.12142051756381989, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10503111779689789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.10087062418460846, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07280049473047256, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.061757564544677734, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05727946385741234, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.05617816373705864, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.036530960351228714, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.0293173398822546, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.028445376083254814, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02637549676001072, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.025661012157797813, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01882186345756054, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.018007146194577217, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.016878550872206688, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011273661628365517, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1979614496231079, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1667785942554474, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15573900938034058, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.13864170014858246, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.09242524206638336, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.080609530210495, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10973996669054031, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09810473024845123, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.09459838271141052, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0723351389169693, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06904517114162445, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05652467906475067, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047254182398319244, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.044644176959991455, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.04400847479701042, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028342943638563156, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.023390034213662148, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.022657498717308044, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01943822391331196, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.01899341493844986, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015094508416950703, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014974990859627724, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.014146531000733376, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010531347244977951, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.15179403126239777, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14197082817554474, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1380159854888916, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12493594735860825, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07206378877162933, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.06823068112134933, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08151601999998093, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07528500258922577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07288048416376114, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06401322036981583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.061093006283044815, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04172021895647049, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03619830682873726, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03475165739655495, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.034396249800920486, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020910104736685753, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.01815195381641388, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.017850453034043312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01654253900051117, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.01632475107908249, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011127959936857224, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011426652781665325, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.010642175562679768, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007993558421730995, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.20220381021499634, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.18954087793827057, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18482954800128937, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.16754484176635742, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.09681946784257889, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.09205564856529236, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10875114798545837, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10031243413686752, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.0978083685040474, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08630497753620148, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.0824117437005043, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05615846440196037, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.049079690128564835, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.047537993639707565, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.04716865345835686, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.028358619660139084, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.026226146146655083, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.025903446599841118, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024288268759846687, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.024080198258161545, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015995437279343605, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018220756202936172, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.015510967001318932, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.014673241414129734, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.2100425511598587, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1862371563911438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17553846538066864, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.15486255288124084, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.09653548151254654, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.08651164174079895, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1170179545879364, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10640973597764969, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.09934656322002411, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08153320848941803, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.0770520567893982, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05932457000017166, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.051041897386312485, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.046578120440244675, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.045478325337171555, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029834363609552383, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.024566739797592163, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.024008722975850105, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021646879613399506, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.02093842253088951, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01601261831820011, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016187265515327454, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.014518968760967255, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01140529103577137, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10942678898572922, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10004673898220062, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09080938249826431, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08225816488265991, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.051150787621736526, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0444350391626358, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06985793262720108, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06270510703325272, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05222764238715172, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04501611739397049, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04430960491299629, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03570513054728508, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03015141747891903, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02485400065779686, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.023445790633559227, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017940843477845192, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.013195608742535114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.012305679731070995, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011994653381407261, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.011120866052806377, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00947542954236269, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00929146446287632, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007555006537586451, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006324824411422014, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09786225855350494, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0890159159898758, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0768873393535614, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.0698358491063118, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04518791660666466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03708744794130325, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0670720636844635, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06062251329421997, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.046377211809158325, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.039977651089429855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.03974602371454239, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03410201892256737, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.028956566005945206, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.021980825811624527, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019998319447040558, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.017153499647974968, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011646955274045467, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.010433832183480263, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010638302192091942, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.009391332045197487, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008932353928685188, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008612682111561298, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.006421079859137535, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005553004331886768, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.22885695099830627, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20502614974975586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19103924930095673, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.17231710255146027, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.106083944439888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09403706341981888, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1362290233373642, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12169615924358368, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.10884593427181244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09145588427782059, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.08895457535982132, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07020296901464462, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05824672058224678, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.05111221596598625, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.04928810894489288, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03539315611124039, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.02645305171608925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025116892531514168, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02351514808833599, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.022319622337818146, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01830754056572914, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017256073653697968, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.01505353394895792, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011040076613426208, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.1754245162010193, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.15640783309936523, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15060074627399445, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.128059521317482, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.08196604251861572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.0754886344075203, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09328975528478622, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08507637679576874, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.08328621834516525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06662632524967194, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.06185607612133026, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04795331135392189, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.041017524898052216, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.03961574658751488, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.03928731381893158, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02404949441552162, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.020974326878786087, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.020577022805809975, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.017991911619901657, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.017775194719433784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.012997996062040329, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.013535979203879833, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.012559572234749794, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009932121261954308, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.14613348245620728, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13672123849391937, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13313330709934235, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.12072916328907013, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06988288462162018, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.066216841340065, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07891792058944702, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07277127355337143, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07060376554727554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06217484548687935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05945649743080139, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04084283858537674, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03561181202530861, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.03431728482246399, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.034004371613264084, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02065189741551876, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.0189052801579237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.018640061840415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017475683242082596, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.0172955933958292, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011666343547403812, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013137471862137318, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011262771673500538, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010534110479056835, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1510132998228073, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1414870321750641, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1380009651184082, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.1250990480184555, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.07223952561616898, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.0686165913939476, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08119947463274002, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07486885040998459, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.07295320183038712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06430280953645706, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.06146566942334175, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04195944964885712, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03649057820439339, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.035315532237291336, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.03503255918622017, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02117588371038437, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.019263477995991707, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.019017241895198822, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017784278839826584, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.017611857503652573, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011868396773934364, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013143843039870262, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.011492797173559666, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010369645431637764, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.17465005815029144, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1558675915002823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.14808034896850586, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.1291535645723343, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.08094026148319244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.07352861016988754, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.09782074391841888, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.08719391375780106, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.08270568400621414, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.06804914772510529, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.06397432088851929, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.04945197328925133, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04238172993063927, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.03962462767958641, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.038934528827667236, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.025277579203248024, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.021768853068351746, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.021414292976260185, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.01939650997519493, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.01896320842206478, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.014511682093143463, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015204058028757572, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.013631673529744148, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011920841410756111, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.10622163116931915, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09685619920492172, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08785220235586166, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.07950682938098907, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.04953319579362869, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.04295690357685089, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0674840658903122, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060866087675094604, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.05066363513469696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04353829473257065, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.04281485453248024, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.034472931176424026, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029236581176519394, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.024037819355726242, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.0226634219288826, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017295673489570618, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.012730593793094158, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.011851800605654716, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011553224176168442, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.010696390643715858, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009110292419791222, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008935170248150826, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.007257817778736353, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006024706643074751, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.09702502191066742, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08811890333890915, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07651286572217941, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.06944186985492706, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.04480211064219475, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.03706799075007439, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0663924440741539, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.059539228677749634, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.046001020818948746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.039531409740448, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.039297208189964294, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03370159491896629, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.028349172323942184, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.02182171307504177, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.019961286336183548, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01689358428120613, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.011596852913498878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.01045574713498354, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010568174533545971, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.00941626075655222, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008863795548677444, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008531225845217705, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.0065067969262599945, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005604125559329987, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.18896484375, "total_bits": 9181184.0, "err": 0.23791907727718353, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21077701449394226, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19457779824733734, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.72021484375, "total_bits": 11409408.0, "err": 0.1756177544593811, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.22021484375, "total_bits": 13506560.0, "err": 0.11005691438913345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.68896484375, "total_bits": 15472640.0, "err": 0.09602507203817368, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14238159358501434, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.127810999751091, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.18896484375, "total_bits": 13375488.0, "err": 0.1134369820356369, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09398218244314194, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.656982421875, "total_bits": 15338496.0, "err": 0.09144990146160126, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07325556874275208, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06113177537918091, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.22021484375, "total_bits": 17700864.0, "err": 0.052972227334976196, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.31396484375, "total_bits": 18094080.0, "err": 0.050867367535829544, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0368383415043354, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.22021484375, "total_bits": 21895168.0, "err": 0.027391968294978142, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.37646484375, "total_bits": 22550528.0, "err": 0.025832340121269226, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.024152856320142746, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.72021484375, "total_bits": 23992320.0, "err": 0.02273949421942234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018956094980239868, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01794395223259926, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.2191162109375, "total_bits": 26084864.0, "err": 0.015539372339844704, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01130509190261364, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1878662109375, "total_bits": 36706304.0, "err": 0.11231028288602829, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09916410595178604, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09497776627540588, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7191162109375, "total_bits": 45619200.0, "err": 0.08216572552919388, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2191162109375, "total_bits": 54007808.0, "err": 0.051072657108306885, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6878662109375, "total_bits": 61872128.0, "err": 0.046042703092098236, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06085621565580368, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05360368266701698, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1878662109375, "total_bits": 53483520.0, "err": 0.0519411563873291, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.041315749287605286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.65643310546875, "total_bits": 61344768.0, "err": 0.039163392037153244, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030620435252785683, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02746104821562767, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2191162109375, "total_bits": 70785024.0, "err": 0.02641395479440689, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3128662109375, "total_bits": 72357888.0, "err": 0.026157543063163757, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016202010214328766, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2191162109375, "total_bits": 87562240.0, "err": 0.016265111044049263, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3753662109375, "total_bits": 90183680.0, "err": 0.01601477339863777, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.014744848012924194, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7191162109375, "total_bits": 95950848.0, "err": 0.014607666060328484, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.010016234591603279, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.012966941110789776, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218841552734375, "total_bits": 104334848.0, "err": 0.009735913947224617, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011625419370830059, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.1296224743127823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12162785232067108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1186537817120552, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.10738984495401382, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.06158532202243805, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.05853750929236412, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.06929770112037659, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06383230537176132, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.062178242951631546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05480451136827469, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.05230465531349182, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03543650731444359, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.030635206028819084, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.029623575508594513, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.029378041625022888, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.017733560875058174, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.015334339812397957, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.015107988379895687, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.013957837596535683, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.013804178684949875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009353616274893284, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009420191869139671, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.009010479785501957, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006379729136824608, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1876046316964284, "total_bits": 128456703.99999999, "err": 0.08579771965742111, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.08028317987918854, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.0782545655965805, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7188546316964284, "total_bits": 159651840.0, "err": 0.07078533619642258, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2188546316964284, "total_bits": 189011968.0, "err": 0.04069848358631134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.6876046316964284, "total_bits": 216537088.0, "err": 0.03864951804280281, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.04588383063673973, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.04221785441040993, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1876046316964284, "total_bits": 187176960.0, "err": 0.041133224964141846, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.03615522012114525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6563023158482144, "total_bits": 214699008.0, "err": 0.03452911600470543, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.023535868152976036, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.020343489944934845, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.218854631696429, "total_bits": 247732224.0, "err": 0.019664473831653595, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.312604631696429, "total_bits": 253237248.0, "err": 0.019505634903907776, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.011830596253275871, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.218854631696429, "total_bits": 306452480.0, "err": 0.010362852364778519, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375104631696429, "total_bits": 315627520.0, "err": 0.010212412104010582, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.009469813667237759, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.718854631696429, "total_bits": 335812608.0, "err": 0.009371397085487843, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.006433716043829918, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.006625191308557987, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.218776157924107, "total_bits": 365168128.0, "err": 0.006212243344634771, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.004793333355337381, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1789376395089284, "total_bits": 127947775.99999999, "err": 0.12177145481109619, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.11236367374658585, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.10746297985315323, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7235804966517856, "total_bits": 159929344.0, "err": 0.09324418008327484, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2235804966517856, "total_bits": 189289472.0, "err": 0.05695829913020134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7146519252232144, "total_bits": 218125312.0, "err": 0.05293412134051323, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.06996972858905792, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.06203378364443779, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1789376395089284, "total_bits": 186668032.0, "err": 0.05789746716618538, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.049383196979761124, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6608973911830356, "total_bits": 214968832.0, "err": 0.04664077237248421, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.03592713922262192, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.03030954860150814, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.223580496651786, "total_bits": 248009728.0, "err": 0.028085807338356972, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.321794782366071, "total_bits": 253776896.0, "err": 0.027511540800333023, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0186019204556942, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.223580496651786, "total_bits": 306729984.0, "err": 0.015720341354608536, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339651925223214, "total_bits": 313545728.0, "err": 0.015453184954822063, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.014368661679327488, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.723580496651786, "total_bits": 336090112.0, "err": 0.014050650410354137, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.010957643389701843, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.011354854330420494, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.227770124162946, "total_bits": 365696256.0, "err": 0.010202922858297825, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.00919077917933464, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] } ], "last_module_idx": 66, "base_perplexity": 7.620468200387372 }